commit b964e4d5785e692f3eefc79830628958a721cf68 Author: deceivedhornet Date: Thu Jun 11 01:45:22 2026 -0400 Create ingest_database.py diff --git a/ingest_database.py b/ingest_database.py new file mode 100644 index 0000000..715336a --- /dev/null +++ b/ingest_database.py @@ -0,0 +1,95 @@ +import os +import json +import re + +# Paths configuration +VANILLA_DB_DIR = r"D:\SteamLibrary\steamapps\common\Grim Dawn\database\records" +JSON_OUTPUT_DIR = r".\vanilla_json_mirror" + +def clean_dbr_value(val): + """ + Cleans trailing commas and whitespace. + Returns None if the value is a standard engine zero-default or empty field, + otherwise returns the cleaned string value. + """ + val = val.rstrip(",").strip() + + # Filter out empty fields or zero defaults (0, 0.0, 0.000000) + if val == "" or val == "0" or re.match(r"^0\.0+$", val): + return None + return val + +def parse_dbr_file(file_path): + """Parses a single .dbr file into a clean, sparse key-value dictionary.""" + file_data = {} + with open(file_path, "r", encoding="utf-8", errors="ignore") as f: + for line in f: + line = line.strip() + if not line or "," not in line: + continue + + # Split exactly at the first comma to isolate the key + key, rest = line.split(",", 1) + key = key.strip() + + cleaned_val = clean_dbr_value(rest) + if cleaned_val is not None: + file_data[key] = cleaned_val + + return file_data + +def main(): + if not os.path.exists(VANILLA_DB_DIR): + print(f"Error: Vanilla database directory not found at: {VANILLA_DB_DIR}") + return + + print(f"Starting database ingestion from: {VANILLA_DB_DIR}") + print("Grouping files by directory level...") + + processed_directories = 0 + total_files_mapped = 0 + + # Walk the directory tree + for root, dirs, files in os.walk(VANILLA_DB_DIR): + # Filter for only .dbr files in the current folder + dbr_files = [f for f in files if f.lower().endswith('.dbr')] + + if not dbr_files: + continue + + # This will hold the map of filename -> sparse content dictionary for this specific folder + directory_map = {} + + for filename in dbr_files: + full_path = os.path.join(root, filename) + sparse_content = parse_dbr_file(full_path) + + # Keep track of the file even if it's completely empty after cleaning, + # so the compiler knows the file exists in vanilla. + directory_map[filename] = sparse_content + total_files_mapped += 1 + + # Determine the relative path to recreate the mirror structure + rel_path = os.path.relpath(root, VANILLA_DB_DIR) + + # Define our output directory mirror path + target_output_dir = os.path.join(JSON_OUTPUT_DIR, rel_path) + os.makedirs(target_output_dir, exist_ok=True) + + # Use the name of the parent folder as the JSON filename + # e.g., .../items/gearfeet/ becomes .../items/gearfeet/gearfeet.json + folder_name = os.path.basename(root) if rel_path != "." else "root" + json_output_path = os.path.join(target_output_dir, f"{folder_name}.json") + + # Save out the consolidated directory map + with open(json_output_path, "w", encoding="utf-8") as json_file: + json.dump(directory_map, json_file, indent=2) + + processed_directories += 1 + + print(f"\nSuccess! Phase 1 Ingestion Complete.") + print(f"Processed {processed_directories} directories.") + print(f"Mapped a total of {total_files_mapped} files into sparse JSON endpoints.") + +if __name__ == "__main__": + main()