import json
import uuid
from typing import List, Dict
import argparse
import shutil

# ---------------------------
# Helper functions
# ---------------------------

def load_json_data(filepath: str):
    """Load JSON file from disk."""
    with open(filepath, 'r', encoding='utf-8') as f:
        return json.load(f)

def get_img_path(data: List[dict]):
    """Extract image paths from a single record."""
    return [item["image"] for item in data[1].get("content", []) if item.get("type") == "image"]

# def get_conversations(data: List[dict]):
#     """Extract conversations in desired format."""
#     conversation_data = []
#     for item in data:
#         if item.get("role") == "system":
#             conversation_data.append({"from": "system", "value": item.get("content")})
#         elif item.get("role") == "user":
#             texts = [x["text"] for x in item.get("content", []) if x.get("type") == "text"][0]
#             conversation_data.append({"from": "human", "value": texts})
#         elif item.get("role") == "assistant" or item.get("role") == "assistant_gt":
#             #texts = [x["text"] for x in item.get("content", []) if x.get("type") == "text"][0]
#             #conversation_data.append({"from": "gpt", "value": texts})
#             conversation_data.append({"from": "gpt", "value": item.get("content")})
#     return conversation_data

def get_conversations_v2(data: List[dict]) -> List[Dict[str, str]]:
    """Extract conversations in desired format, handling multiple text items."""
    conversation_data = []
    
    for item in data: 
        role = item.get("role")
        
        if role == "system":
            conversation_data.append({"from": "system", "value": item.get("content")})
            
        elif role == "user":
            texts = [x["text"] for x in item.get("content", []) if x.get("type") == "text"]

            if texts: 
                conversation_data.append({"from": "human", "value": texts[0]})
                
        elif role in ["assistant", "assistant_gt"]:
            content = item.get("content")

            if isinstance(content, list): # list of dicts
                texts = [x["text"] for x in content if x.get("type") == "text"]
                if texts:
                    conversation_data.append({"from": "gpt", "value": texts[0]})
            
            elif isinstance(content, str): # single string
                conversation_data.append({"from": "gpt", "value": content})
            
            else: # raw content
                conversation_data.append({"from": "gpt", "value": str(content)})
    return conversation_data

def convert(images: List[str] = [], conversation: List[Dict[str,str]] = []):
    """Convert raw data into docai_mgp_facture_data instance."""
    new_data = docai_mgp_facture_data()
    new_data.id = str(uuid.uuid4())
    new_data.images["images"] = images
    new_data.conversations["conversations"] = conversation
    return new_data

# ---------------------------
# Data class
# ---------------------------

class docai_mgp_facture_data:
    id: str
    images: Dict[str, List[str]]
    conversations: Dict[str, List[Dict[str,str]]]

    def __init__(self):
        self.id = ""
        self.images = {"images": []}
        self.conversations = {"conversations": [{"from": "", "value": ""}]}

    def display_data(self):
        print("Current data in instance:")
        print(f"ID: {self.id}")
        print("Images:")
        for img in self.images.get("images", []):
            print(f" - {img}")
        print("Conversations:")
        for conv in self.conversations.get("conversations", []):
            print(f" - from: {conv.get('from')}, value: {conv.get('value')}")

    def write_to_json(self, filename: str):
        """Write the current instance data to a JSON file (overwrite)."""
        data_dict = {
            "id": self.id,
            "images": self.images["images"],
            "conversations": self.conversations["conversations"]
        }
        with open(filename, "w", encoding="utf-8") as f:
            json.dump(data_dict, f, ensure_ascii=False, indent=4)
        print(f"Data written to {filename}")


def main() -> None:
    '''
    Input: one or more JSON files path
    Output: one JSON file under conversation format
    
    Ex: python3 ../convert_conversation_json.py \
                --source_path data1.json data2.json ... \
                --destination_path dest_path.json
    '''
    parser = argparse.ArgumentParser(description="Convert one or more JSON files to conversation-form JSON.")
    parser.add_argument(
        "--source_path",
        type=str,
        nargs='+',  # allow multiple files
        required=True,
        help="Path(s) to the source JSON file."
    )
    parser.add_argument(
        "--destination_path",
        type=str,
        required=True,
        help="Path to the destination JSON file."
    )
    args = parser.parse_args()

    all_data = []

    for source_path in args.source_path:  # match the argument name
        source_data = load_json_data(source_path)
        for record_data in source_data:
            images = get_img_path(record_data)
            conversations = get_conversations_v2(record_data)
            record = convert(images=images, conversation=conversations)
            all_data.append({
                "id": record.id,
                "images": record.images["images"],
                "conversations": record.conversations["conversations"]
            })

    with open(args.destination_path, "w", encoding="utf-8") as f:
        json.dump(all_data, f, ensure_ascii=False, indent=4)

    print(f"✅ All data from {len(args.source_path)} file(s) saved to {args.destination_path}")

# ---------------------------
# Main script
# ---------------------------

if __name__ == "__main__":
    main()