diff --git a/ingest_database.py b/ingest_database.py index ee31a26..271efd6 100644 --- a/ingest_database.py +++ b/ingest_database.py @@ -4,11 +4,13 @@ import re # Paths configuration VANILLA_DB_DIR = r"D:\SteamLibrary\steamapps\common\Grim Dawn\database" +VANILLA_TEXT_DIR = r"D:\SteamLibrary\steamapps\common\Grim Dawn\resources\text_en" # Target scan directory for Phase 1 ingestion VANILLA_RECORDS_DIR = os.path.join(VANILLA_DB_DIR, "records") JSON_OUTPUT_DIR = r".\vanilla_json_mirror" +TEXT_OUTPUT_DIR = os.path.join(JSON_OUTPUT_DIR, "resources", "text_en") def clean_dbr_value(val): """ @@ -23,6 +25,63 @@ def clean_dbr_value(val): return None return val +def parse_text_file(file_path): + """Parses a single .txt file with key=value format into a dictionary.""" + file_data = {} + with open(file_path, "r", encoding="utf-8", errors="ignore") as f: + for line in f: + line = line.strip() + # Skip empty lines and comments (lines that don't contain =) + if not line or "=" not in line: + continue + + # Split at the first = to isolate the key and value + key, value = line.split("=", 1) + key = key.strip() + value = value.strip() + + # Only store non-empty values + if key and value: + file_data[key] = value + + return file_data + +def ingest_text_files(): + """Ingests all .txt files from Grim Dawn resources/text_en and converts to JSON.""" + if not os.path.exists(VANILLA_TEXT_DIR): + print(f"Warning: Text resources directory not found at: {VANILLA_TEXT_DIR}") + return 0 + + print(f"\nStarting text file ingestion from: {VANILLA_TEXT_DIR}") + + # Create output directory if it doesn't exist + os.makedirs(TEXT_OUTPUT_DIR, exist_ok=True) + + processed_files = 0 + + # Get all .txt files in the text_en directory + for filename in os.listdir(VANILLA_TEXT_DIR): + if not filename.lower().endswith('.txt'): + continue + + file_path = os.path.join(VANILLA_TEXT_DIR, filename) + + # Parse the text file + text_data = parse_text_file(file_path) + + # Create output JSON filename (replace .txt with .json) + json_filename = os.path.splitext(filename)[0] + ".json" + json_output_path = os.path.join(TEXT_OUTPUT_DIR, json_filename) + + # Write to JSON + with open(json_output_path, "w", encoding="utf-8") as json_file: + json.dump(text_data, json_file, indent=2) + + processed_files += 1 + print(f" Converted: {filename} -> {json_filename}") + + return processed_files + def parse_dbr_file(file_path): """Parses a single .dbr file into a clean, sparse key-value dictionary.""" file_data = {} @@ -101,9 +160,16 @@ def main(): processed_directories += 1 - print(f"\nSuccess! Phase 1 Ingestion Complete.") + print(f"\nPhase 1 Complete (Records).") print(f"Processed {processed_directories} directories.") print(f"Mapped a total of {total_files_mapped} files into sparse JSON endpoints.") + + # Phase 2: Ingest text files + text_files_processed = ingest_text_files() + print(f"\nPhase 2 Complete (Text Resources).") + print(f"Converted {text_files_processed} text files to JSON.") + + print(f"\nSuccess! Full Ingestion Complete.") if __name__ == "__main__": main()