# 2025-05-30 - Added fix for save/load routines crashing
# 2025-05-29 - Created by Nemesis
# This patch modifies HOBBIT.DAT from the 1984 DOS release of "The Hobbit" to fix a prefetch issue causing
# the game to crash on 386 or newer CPUs. This results in the game locking up as soon as it brings up the text
# interface, possibly with the text "You are an" being the only thing shown. The game was originally written
# for the 8088, which had a prefetch queue of 6 bytes. When the 386 came out, that increased to 16 bytes. The
# Hobbit used code which modified instructions only a few bytes ahead, with two NOP instructions (0x90) inserted
# between them to just reach the 6 bytes space required. This was insufficient on the 386, and the old data was
# stuck in the prefetch queue, causing the code to malfunction. This fix replaces the two successive NOP
# instructions with a branch instruction past itself. Since branches invalidate the prefetch queue, this flushes
# the old data from the queue and ensures the newly modified opcodes are read in instead.
#
# Additionally, it was noticed that the "SAVE" and "LOAD" routines on this game intermittently crash. This
# occurs because the 32-bit EDX register was being used inside the COM loader for this game as a 32-bit pointer,
# but only the lower 16-bits of the register were being initialized. This caused crashes when the upper 16 bits
# contained a non-zero value. This is patched by inserting a new routine to use a 16-bit memory reference, saving
# and restoring values to the stack to ensure the register state is preserved.
import sys
import re
import shutil
from pathlib import Path

def patchDatFileIfRequired(filePath):
    # Open the target file, and patch the data if necessary.
    updatedFile = False
    with open(filePath, 'rb') as targetFile:
        # Read the file data
        print("Checking {}".format(filePath))
        fileData = targetFile.read()
        fileDataAsString = fileData.hex().upper()

        # Locate and patch the polymorphic code. Two instances are expected. Finds code that looks like this:
        #     mov     si, [????]
        #     mov     [targetOpcode+1], si
        #     nop
        #     nop
        #targetOpcode:
        #     mov     bx, 0
        # and changes it to this:
        #     mov     si, [????]
        #     mov     [targetOpcode+1], si
        #    *jmp*    targetOpcode
        #targetOpcode:
        #     mov     bx, 0
        if not re.search("(8B36)([A-F0-9]{4})(8936)([A-F0-9]{4})(EB00)(BB0000)", fileDataAsString):
            fileDataAsString, replaceCount = re.subn("(8B36)([A-F0-9]{4})(8936)([A-F0-9]{4})(9090)(BB0000)", "\g<1>\g<2>\g<3>\g<4>EB00\g<6>", fileDataAsString)
            updatedFile |= replaceCount > 0
            if replaceCount > 0: print("Patched polymorphic code prefetch cache issue")
        else:
            print("Polymorphic code prefetch cache issue already patched")

    # Write the modified data back if we changed it, creating a backup file if none exists.
    if updatedFile:
        # Create a backup file if none exists
        backupFilePath = filePath.with_suffix(filePath.suffix[:len(filePath.suffix)-1] + "_")
        if not backupFilePath.is_file():
            shutil.copy2(filePath, backupFilePath)

        # Write the modified file data
        with open(filePath, 'wb') as outputFile:
            fileData = bytearray.fromhex(fileDataAsString)
            outputFile.write(fileData)

def patchComFileIfRequired(filePath):
    # Open the target file, and patch the data if necessary.
    updatedFile = False
    with open(filePath, 'rb') as targetFile:
        # Read the file data
        print("Checking {}".format(filePath))
        fileData = targetFile.read()
        fileDataAsString = fileData.hex().upper()
        
        # Validate the file size. Since we're appending a routine with an absolute offset, we need to make sure
        # we're adding it at the expected location.
        expectedFileSize = 0x248
        if len(fileData) == expectedFileSize+13:
            print("File already patched")
            return
        elif len(fileData) != expectedFileSize:
            print("File wasn't the expected size.")
            return

        # Locate and patch the save routine. Finds code that looks like this:
        #     mov     [edx], al
        #     add     ah, 30h ; '0'
        #     mov     [edx+1], ah
        #     mov     ah, 3Ch ; '<'
        # and changes it to this:
        #    *call*   348h
        #    *nop*
        #    *nop*
        #    *nop*
        #    *nop*
        #    *nop*
        #    *nop*
        #    *nop*
        #     mov     ah, 3Ch ; '<'
        if not re.search("(E87C0190909090909090)(B43C)", fileDataAsString):
            fileDataAsString, replaceCount = re.subn("(67880280C43067886201)(B43C)", "E87C0190909090909090\g<2>", fileDataAsString)
            updatedFile |= replaceCount > 0
            if replaceCount > 0: print("Patched save routine upper register data issue")
        else:
            print("Save routine upper register data issue already patched")

        # Locate and patch the load routine. Finds code that looks like this:
        #     mov     [edx], al
        #     add     ah, 30h ; '0'
        #     mov     [edx+1], ah
        #     mov     ax, 3D02h
        # and changes it to this:
        #    *call*   348h
        #    *nop*
        #    *nop*
        #    *nop*
        #    *nop*
        #    *nop*
        #    *nop*
        #    *nop*
        #     mov     ax, 3D02h
        if not re.search("(E8340190909090909090)(B8023D)", fileDataAsString):
            fileDataAsString, replaceCount = re.subn("(67880280C43067886201)(B8023D)", "E8340190909090909090\g<2>", fileDataAsString)
            updatedFile |= replaceCount > 0
            if replaceCount > 0: print("Patched load routine upper register data issue")
        else:
            print("Load routine upper register data issue already patched")

        # Append our fix function to the file. Finds data that looks like this:
        #     db      'Dos driver by Mok.',0
        # and changes it to this:
        #     db      'Dos driver by Mok.',0
        #    *push*   si
        #    *mov*    si, dx
        #    *mov*    byte ptr [si], al
        #    *add*    ah, 0x30
        #    *mov*    byte ptr [si + 1], ah
        #    *pop*    si
        #    *ret*
        if not re.search("(446F7320647269766572206279204D6F6B2E00)(5689D6880480C4308864015EC3)", fileDataAsString):
            fileDataAsString, replaceCount = re.subn("(446F7320647269766572206279204D6F6B2E00)", "\g<1>5689D6880480C4308864015EC3", fileDataAsString)
            updatedFile |= replaceCount > 0
            if replaceCount > 0: print("Added fix function for load/save routines")
        else:
            print("Fix function for load/save routines already added")

    # Write the modified data back if we changed it, creating a backup file if none exists.
    if updatedFile:
        # Create a backup file if none exists
        backupFilePath = filePath.with_suffix(filePath.suffix[:len(filePath.suffix)-1] + "_")
        if not backupFilePath.is_file():
            shutil.copy2(filePath, backupFilePath)

        # Write the modified file data
        with open(filePath, 'wb') as outputFile:
            fileData = bytearray.fromhex(fileDataAsString)
            outputFile.write(fileData)

# Get the search path from the command line if supplied, otherwise default to the current dir.
searchPath = "."
if len(sys.argv) > 1:
    searchPath = sys[1]

# Find and patch the dat file in the target path
for filePath in Path(searchPath).rglob("HOBBIT.DAT"):
    patchDatFileIfRequired(filePath)

# Find and patch the com file in the target path
for filePath in Path(searchPath).rglob("HOBBIT.COM"):
    patchComFileIfRequired(filePath)
