commit b964e4d5785e692f3eefc79830628958a721cf68
Author: deceivedhornet <henrique.nakashima+github@gmail.com>
Date:   Thu Jun 11 01:45:22 2026 -0400

    Create ingest_database.py

diff --git a/ingest_database.py b/ingest_database.py
new file mode 100644
index 0000000..715336a
--- /dev/null
+++ b/ingest_database.py
@@ -0,0 +1,95 @@
+import os
+import json
+import re
+
+# Paths configuration
+VANILLA_DB_DIR = r"D:\SteamLibrary\steamapps\common\Grim Dawn\database\records"
+JSON_OUTPUT_DIR = r".\vanilla_json_mirror"
+
+def clean_dbr_value(val):
+    """
+    Cleans trailing commas and whitespace.
+    Returns None if the value is a standard engine zero-default or empty field,
+    otherwise returns the cleaned string value.
+    """
+    val = val.rstrip(",").strip()
+    
+    # Filter out empty fields or zero defaults (0, 0.0, 0.000000)
+    if val == "" or val == "0" or re.match(r"^0\.0+$", val):
+        return None
+    return val
+
+def parse_dbr_file(file_path):
+    """Parses a single .dbr file into a clean, sparse key-value dictionary."""
+    file_data = {}
+    with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+        for line in f:
+            line = line.strip()
+            if not line or "," not in line:
+                continue
+            
+            # Split exactly at the first comma to isolate the key
+            key, rest = line.split(",", 1)
+            key = key.strip()
+            
+            cleaned_val = clean_dbr_value(rest)
+            if cleaned_val is not None:
+                file_data[key] = cleaned_val
+                
+    return file_data
+
+def main():
+    if not os.path.exists(VANILLA_DB_DIR):
+        print(f"Error: Vanilla database directory not found at: {VANILLA_DB_DIR}")
+        return
+
+    print(f"Starting database ingestion from: {VANILLA_DB_DIR}")
+    print("Grouping files by directory level...")
+    
+    processed_directories = 0
+    total_files_mapped = 0
+
+    # Walk the directory tree
+    for root, dirs, files in os.walk(VANILLA_DB_DIR):
+        # Filter for only .dbr files in the current folder
+        dbr_files = [f for f in files if f.lower().endswith('.dbr')]
+        
+        if not dbr_files:
+            continue
+            
+        # This will hold the map of filename -> sparse content dictionary for this specific folder
+        directory_map = {}
+        
+        for filename in dbr_files:
+            full_path = os.path.join(root, filename)
+            sparse_content = parse_dbr_file(full_path)
+            
+            # Keep track of the file even if it's completely empty after cleaning,
+            # so the compiler knows the file exists in vanilla.
+            directory_map[filename] = sparse_content
+            total_files_mapped += 1
+
+        # Determine the relative path to recreate the mirror structure
+        rel_path = os.path.relpath(root, VANILLA_DB_DIR)
+        
+        # Define our output directory mirror path
+        target_output_dir = os.path.join(JSON_OUTPUT_DIR, rel_path)
+        os.makedirs(target_output_dir, exist_ok=True)
+        
+        # Use the name of the parent folder as the JSON filename
+        # e.g., .../items/gearfeet/ becomes .../items/gearfeet/gearfeet.json
+        folder_name = os.path.basename(root) if rel_path != "." else "root"
+        json_output_path = os.path.join(target_output_dir, f"{folder_name}.json")
+        
+        # Save out the consolidated directory map
+        with open(json_output_path, "w", encoding="utf-8") as json_file:
+            json.dump(directory_map, json_file, indent=2)
+            
+        processed_directories += 1
+
+    print(f"\nSuccess! Phase 1 Ingestion Complete.")
+    print(f"Processed {processed_directories} directories.")
+    print(f"Mapped a total of {total_files_mapped} files into sparse JSON endpoints.")
+
+if __name__ == "__main__":
+    main()