Also ingest resources/text_en into .jsons

2026-06-11 03:05:47 -04:00 · 2026-06-11 03:05:47 -04:00 · 2ce6e81f10
commit 2ce6e81f10
parent d80b9d56c8
1 changed files with 67 additions and 1 deletions
--- a/ingest_database.py
+++ b/ingest_database.py
@ -4,11 +4,13 @@ import re

 # Paths configuration
 VANILLA_DB_DIR = r"D:\SteamLibrary\steamapps\common\Grim Dawn\database"
+VANILLA_TEXT_DIR = r"D:\SteamLibrary\steamapps\common\Grim Dawn\resources\text_en"

 # Target scan directory for Phase 1 ingestion
 VANILLA_RECORDS_DIR = os.path.join(VANILLA_DB_DIR, "records")

 JSON_OUTPUT_DIR = r".\vanilla_json_mirror"
+TEXT_OUTPUT_DIR = os.path.join(JSON_OUTPUT_DIR, "resources", "text_en")

 def clean_dbr_value(val):
    """
@ -23,6 +25,63 @@ def clean_dbr_value(val):
        return None
    return val

+def parse_text_file(file_path):
+    """Parses a single .txt file with key=value format into a dictionary."""
+    file_data = {}
+    with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+        for line in f:
+            line = line.strip()
+            # Skip empty lines and comments (lines that don't contain =)
+            if not line or "=" not in line:
+                continue
+            
+            # Split at the first = to isolate the key and value
+            key, value = line.split("=", 1)
+            key = key.strip()
+            value = value.strip()
+            
+            # Only store non-empty values
+            if key and value:
+                file_data[key] = value
+                
+    return file_data
+
+def ingest_text_files():
+    """Ingests all .txt files from Grim Dawn resources/text_en and converts to JSON."""
+    if not os.path.exists(VANILLA_TEXT_DIR):
+        print(f"Warning: Text resources directory not found at: {VANILLA_TEXT_DIR}")
+        return 0
+    
+    print(f"\nStarting text file ingestion from: {VANILLA_TEXT_DIR}")
+    
+    # Create output directory if it doesn't exist
+    os.makedirs(TEXT_OUTPUT_DIR, exist_ok=True)
+    
+    processed_files = 0
+    
+    # Get all .txt files in the text_en directory
+    for filename in os.listdir(VANILLA_TEXT_DIR):
+        if not filename.lower().endswith('.txt'):
+            continue
+        
+        file_path = os.path.join(VANILLA_TEXT_DIR, filename)
+        
+        # Parse the text file
+        text_data = parse_text_file(file_path)
+        
+        # Create output JSON filename (replace .txt with .json)
+        json_filename = os.path.splitext(filename)[0] + ".json"
+        json_output_path = os.path.join(TEXT_OUTPUT_DIR, json_filename)
+        
+        # Write to JSON
+        with open(json_output_path, "w", encoding="utf-8") as json_file:
+            json.dump(text_data, json_file, indent=2)
+        
+        processed_files += 1
+        print(f"  Converted: {filename} -> {json_filename}")
+    
+    return processed_files
+
 def parse_dbr_file(file_path):
    """Parses a single .dbr file into a clean, sparse key-value dictionary."""
    file_data = {}
@ -101,9 +160,16 @@ def main():
            
        processed_directories += 1

-    print(f"\nSuccess! Phase 1 Ingestion Complete.")
+    print(f"\nPhase 1 Complete (Records).")
    print(f"Processed {processed_directories} directories.")
    print(f"Mapped a total of {total_files_mapped} files into sparse JSON endpoints.")
+    
+    # Phase 2: Ingest text files
+    text_files_processed = ingest_text_files()
+    print(f"\nPhase 2 Complete (Text Resources).")
+    print(f"Converted {text_files_processed} text files to JSON.")
+    
+    print(f"\nSuccess! Full Ingestion Complete.")

 if __name__ == "__main__":
    main()