deltadawn/ingest_database.py

import os
import json
import re

# Paths configuration
VANILLA_DB_DIR = r"D:\SteamLibrary\steamapps\common\Grim Dawn\database"
VANILLA_TEXT_DIR = r"D:\SteamLibrary\steamapps\common\Grim Dawn\resources\text_en"

# Target scan directory for Phase 1 ingestion
VANILLA_RECORDS_DIR = os.path.join(VANILLA_DB_DIR, "records")

JSON_OUTPUT_DIR = r".\vanilla_json_mirror"
TEXT_OUTPUT_DIR = os.path.join(JSON_OUTPUT_DIR, "resources", "text_en")

def clean_dbr_value(val):
    """
    Cleans trailing commas and whitespace.
    Returns None if the value is a standard engine zero-default or empty field,
    otherwise returns the cleaned string value.
    """
    val = val.rstrip(",").strip()

    # Filter out empty fields or zero defaults (0, 0.0, 0.000000)
    if val == "" or val == "0" or re.match(r"^0\.0+$", val):
        return None
    return val

def parse_text_file(file_path):
    """Parses a single .txt file with key=value format into a dictionary."""
    file_data = {}
    with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
        for line in f:
            line = line.strip()
            # Skip empty lines and comments (lines that don't contain =)
            if not line or "=" not in line:
                continue

            # Split at the first = to isolate the key and value
            key, value = line.split("=", 1)
            key = key.strip()
            value = value.strip()

            # Only store non-empty values
            if key and value:
                file_data[key] = value

    return file_data

def ingest_text_files():
    """Ingests all .txt files from Grim Dawn resources/text_en and converts to JSON."""
    if not os.path.exists(VANILLA_TEXT_DIR):
        print(f"Warning: Text resources directory not found at: {VANILLA_TEXT_DIR}")
        return 0

    print(f"\nStarting text file ingestion from: {VANILLA_TEXT_DIR}")

    # Create output directory if it doesn't exist
    os.makedirs(TEXT_OUTPUT_DIR, exist_ok=True)

    processed_files = 0

    # Get all .txt files in the text_en directory
    for filename in os.listdir(VANILLA_TEXT_DIR):
        if not filename.lower().endswith('.txt'):
            continue

        file_path = os.path.join(VANILLA_TEXT_DIR, filename)

        # Parse the text file
        text_data = parse_text_file(file_path)

        # Create output JSON filename (replace .txt with .json)
        json_filename = os.path.splitext(filename)[0] + ".json"
        json_output_path = os.path.join(TEXT_OUTPUT_DIR, json_filename)

        # Write to JSON
        with open(json_output_path, "w", encoding="utf-8") as json_file:
            json.dump(text_data, json_file, indent=2)

        processed_files += 1
        print(f"  Converted: {filename} -> {json_filename}")

    return processed_files

def parse_dbr_file(file_path):
    """Parses a single .dbr file into a clean, sparse key-value dictionary."""
    file_data = {}
    with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
        for line in f:
            line = line.strip()
            if not line or "," not in line:
                continue

            # Split exactly at the first comma to isolate the key
            key, rest = line.split(",", 1)
            key = key.strip()

            cleaned_val = clean_dbr_value(rest)
            if cleaned_val is not None:
                file_data[key] = cleaned_val

    return file_data

def main():
    if not os.path.exists(VANILLA_RECORDS_DIR):
        print(f"Error: Vanilla records directory not found at: {VANILLA_RECORDS_DIR}")
        return

    print(f"Starting database ingestion from: {VANILLA_RECORDS_DIR}")
    print("Grouping files by directory level...")

    processed_directories = 0
    total_files_mapped = 0

    # Walk the directory tree
    for root, dirs, files in os.walk(VANILLA_RECORDS_DIR):
        # Filter for only .dbr files in the current folder
        dbr_files = [f for f in files if f.lower().endswith('.dbr')]

        if not dbr_files:
            continue

        # This will hold the map of filename -> sparse content dictionary for this specific folder
        directory_map = {}

        for filename in dbr_files:
            full_path = os.path.join(root, filename)
            sparse_content = parse_dbr_file(full_path)

            # Keep track of the file even if it's completely empty after cleaning,
            # so the compiler knows the file exists in vanilla.

            # Determine the relative path back to the base database directory
            rel_path = os.path.relpath(root, VANILLA_DB_DIR)

            # Reconstruct the standard internal game engine path format
            # e.g., "records/skills/playerclass01/willtolive1.dbr"
            normalized_game_key = os.path.join(rel_path, filename).replace(os.sep, "/")

            # Save using the full normalized path as the dictionary key
            directory_map[normalized_game_key] = sparse_content

            total_files_mapped += 1

        # Determine the relative path to recreate the mirror structure
        rel_path = os.path.relpath(root, VANILLA_DB_DIR)

        # Define our output directory mirror path
        target_output_dir = os.path.join(JSON_OUTPUT_DIR, rel_path)
        os.makedirs(target_output_dir, exist_ok=True)

        # Use the name of the parent folder as the JSON filename
        # e.g., .../items/gearfeet/ becomes .../items/gearfeet/gearfeet.json
        folder_name = os.path.basename(root) if rel_path != "." else "root"
        json_output_path = os.path.join(target_output_dir, f"records.json")

        # Save out the consolidated directory map
        with open(json_output_path, "w", encoding="utf-8") as json_file:
            json.dump(directory_map, json_file, indent=2)

        processed_directories += 1

    print(f"\nPhase 1 Complete (Records).")
    print(f"Processed {processed_directories} directories.")
    print(f"Mapped a total of {total_files_mapped} files into sparse JSON endpoints.")

    # Phase 2: Ingest text files
    text_files_processed = ingest_text_files()
    print(f"\nPhase 2 Complete (Text Resources).")
    print(f"Converted {text_files_processed} text files to JSON.")

    print(f"\nSuccess! Full Ingestion Complete.")

if __name__ == "__main__":
    main()