Also ingest resources/text_en into .jsons

2026-06-11 03:05:47 -04:00 · 2026-06-11 03:05:47 -04:00 · 2ce6e81f10
commit 2ce6e81f10
parent d80b9d56c8
1 changed files with 67 additions and 1 deletions
--- a/ingest_database.py
+++ b/ingest_database.py
@ -4,11 +4,13 @@ import re
 # Paths configuration
 VANILLA_DB_DIR = r"D:\SteamLibrary\steamapps\common\Grim Dawn\database"
 VANILLA_TEXT_DIR = r"D:\SteamLibrary\steamapps\common\Grim Dawn\resources\text_en"
 # Target scan directory for Phase 1 ingestion
 VANILLA_RECORDS_DIR = os.path.join(VANILLA_DB_DIR, "records")
 JSON_OUTPUT_DIR = r".\vanilla_json_mirror"
 TEXT_OUTPUT_DIR = os.path.join(JSON_OUTPUT_DIR, "resources", "text_en")
 def clean_dbr_value(val):
    """
@ -23,6 +25,63 @@ def clean_dbr_value(val):
        return None
    return val
 def parse_text_file(file_path):
    """Parses a single .txt file with key=value format into a dictionary."""
    file_data = {}
    with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
        for line in f:
            line = line.strip()
            # Skip empty lines and comments (lines that don't contain =)
            if not line or "=" not in line:
                continue
            # Split at the first = to isolate the key and value
            key, value = line.split("=", 1)
            key = key.strip()
            value = value.strip()
            # Only store non-empty values
            if key and value:
                file_data[key] = value
    return file_data
 def ingest_text_files():
    """Ingests all .txt files from Grim Dawn resources/text_en and converts to JSON."""
    if not os.path.exists(VANILLA_TEXT_DIR):
        print(f"Warning: Text resources directory not found at: {VANILLA_TEXT_DIR}")
        return 0
    print(f"\nStarting text file ingestion from: {VANILLA_TEXT_DIR}")
    # Create output directory if it doesn't exist
    os.makedirs(TEXT_OUTPUT_DIR, exist_ok=True)
    processed_files = 0
    # Get all .txt files in the text_en directory
    for filename in os.listdir(VANILLA_TEXT_DIR):
        if not filename.lower().endswith('.txt'):
            continue
        file_path = os.path.join(VANILLA_TEXT_DIR, filename)
        # Parse the text file
        text_data = parse_text_file(file_path)
        # Create output JSON filename (replace .txt with .json)
        json_filename = os.path.splitext(filename)[0] + ".json"
        json_output_path = os.path.join(TEXT_OUTPUT_DIR, json_filename)
        # Write to JSON
        with open(json_output_path, "w", encoding="utf-8") as json_file:
            json.dump(text_data, json_file, indent=2)
        processed_files += 1
        print(f"  Converted: {filename} -> {json_filename}")
    return processed_files
 def parse_dbr_file(file_path):
    """Parses a single .dbr file into a clean, sparse key-value dictionary."""
    file_data = {}
@ -101,9 +160,16 @@ def main():
        processed_directories += 1
-    print(f"\nSuccess! Phase 1 Ingestion Complete.")
+    print(f"\nPhase 1 Complete (Records).")
    print(f"Processed {processed_directories} directories.")
    print(f"Mapped a total of {total_files_mapped} files into sparse JSON endpoints.")
    # Phase 2: Ingest text files
    text_files_processed = ingest_text_files()
    print(f"\nPhase 2 Complete (Text Resources).")
    print(f"Converted {text_files_processed} text files to JSON.")
    print(f"\nSuccess! Full Ingestion Complete.")
 if __name__ == "__main__":
    main()