diff --git a/hb/bootloader/include/patch.h b/hb/bootloader/include/patch.h index 79a200b26..55b2b4d8c 100644 --- a/hb/bootloader/include/patch.h +++ b/hb/bootloader/include/patch.h @@ -4,11 +4,15 @@ #include #include // tNDSHeader +#define patchOffsetCacheFileVersion 4 // Change when new functions are being patched, some offsets removed + // the offset order changed, and/or the function signatures changed typedef struct patchOffsetCacheContents { u16 ver; u16 type; u32 dldiOffset; u32 dldiChecked; + u32* mpuRegionOffset; + u32 mpuRegionChecked; u32* wordCommandOffset; u32* bootloaderOffset; u32 bootloaderChecked; @@ -20,9 +24,10 @@ typedef struct patchOffsetCacheContents { u32 swi00Checked; } patchOffsetCacheContents; -extern u16 patchOffsetCacheFileVersion; +extern u16 patchOffsetCacheFilePrevCrc; +extern u16 patchOffsetCacheFileNewCrc; + extern patchOffsetCacheContents patchOffsetCache; -extern bool patchOffsetCacheChanged; extern void rsetPatchCache(const tNDSHeader* ndsHeader); extern void patchBinary(const tNDSHeader* ndsHeader); diff --git a/hb/bootloader/source/arm7/dldi_patcher.c b/hb/bootloader/source/arm7/dldi_patcher.c index e5b607752..f81258595 100644 --- a/hb/bootloader/source/arm7/dldi_patcher.c +++ b/hb/bootloader/source/arm7/dldi_patcher.c @@ -130,9 +130,7 @@ bool dldiPatchBinary (data_t *binData, u32 binSize, bool ramDisk) { // does not have a DLDI section return false; } else { - extern bool patchOffsetCacheChanged; patchOffsetCache.dldiOffset = (u32)patchOffset; - patchOffsetCacheChanged = true; } data_t *pDH = (data_t*)(((u32*)(&_io_dldi)) - 24); diff --git a/hb/bootloader/source/arm7/hook.c b/hb/bootloader/source/arm7/hook.c index 484f41f7a..2d8d9da63 100644 --- a/hb/bootloader/source/arm7/hook.c +++ b/hb/bootloader/source/arm7/hook.c @@ -128,6 +128,8 @@ static const u32 swi00Patched[3] = { 0x4770DF05 , // SWI 0X05 };*/ +static u32 mpuRegionSignature = 0x04000033; + //static const int MAX_HANDLER_SIZE = 50; static u32* hookInterruptHandlerHomebrew (u32* addr, size_t size) { @@ -198,6 +200,24 @@ static u32* hookAccelIPCHomebrew2010(u32* addr, size_t size) { return addr; } +static u32* hookMpu(u32* addr, size_t size) { + u32* end = addr + size/sizeof(u32); + + while (addr < end) { + if (*addr == mpuRegionSignature) + { + break; + } + addr++; + } + + if (addr >= end) { + return NULL; + } + + return addr; +} + static u16* hookSwi00(u16* addr, size_t size) { u16* end = addr + size/sizeof(u16); @@ -283,11 +303,25 @@ const u16* generateA7InstrThumb(int arg1, int arg2) { int hookNds (const tNDSHeader* ndsHeader, u32* sdEngineLocation, u32* wordCommandAddr) { u32* hookLocation = patchOffsetCache.a7IrqHookOffset; u32* hookAccel = patchOffsetCache.a7IrqHookAccelOffset; + u32* mpuRegionLocation = patchOffsetCache.mpuRegionOffset; u16* a9Swi12Location = patchOffsetCache.a9Swi12Offset; u16* swi00Location = patchOffsetCache.swi00Offset; nocashMessage("hookNds"); + if (patchOffsetCache.dldiOffset) { + if (!patchOffsetCache.mpuRegionChecked) { + mpuRegionLocation = hookMpu((u32*)ndsHeader->arm9destination, ndsHeader->arm9binarySize); + if (mpuRegionLocation) { + patchOffsetCache.mpuRegionOffset = mpuRegionLocation; + } + patchOffsetCache.mpuRegionChecked = true; + } + if (mpuRegionLocation) { + *mpuRegionLocation = 0x00000035; // Patch MPU to allow DSi WRAM access + } + } + if (!patchOffsetCache.a9Swi12Checked) { a9Swi12Location = hookSwi12((u16*)ndsHeader->arm9destination, ndsHeader->arm9binarySize); if (a9Swi12Location) { diff --git a/hb/bootloader/source/arm7/main.arm7.c b/hb/bootloader/source/arm7/main.arm7.c index c6e7e5940..d13926358 100644 --- a/hb/bootloader/source/arm7/main.arm7.c +++ b/hb/bootloader/source/arm7/main.arm7.c @@ -180,19 +180,19 @@ static void passArgs_ARM7 (void) { static void initMBK(void) { // This function has no effect with ARM7 SCFG locked - // arm7 is master of WRAM-A, arm9 of WRAM-B & C - REG_MBK9=0x3000000F; + // ARM7 is master of WRAM-A, arm9 of WRAM-B & C + REG_MBK9 = 0x3000000F; - // WRAM-A fully mapped to arm7 - *((vu32*)REG_MBK1)=0x8D898581; // same as dsiware + // WRAM-A fully mapped to ARM7 + *(vu32*)REG_MBK1 = 0x8185898D; // Same as DSiWare - // WRAM-B fully mapped to arm9 // inverted order - *((vu32*)REG_MBK2)=0x8C888480; - *((vu32*)REG_MBK3)=0x9C989490; + // WRAM-B fully mapped to ARM7 // inverted order + *(vu32*)REG_MBK2 = 0x9195999D; + *(vu32*)REG_MBK3 = 0x8185898D; - // WRAM-C fully mapped to arm9 // inverted order - *((vu32*)REG_MBK4)=0x8C888480; - *((vu32*)REG_MBK5)=0x9C989490; + // WRAM-C fully mapped to arm7 // inverted order + *(vu32*)REG_MBK4 = 0x9195999D; + *(vu32*)REG_MBK5 = 0x8185898D; // WRAM mapped to the 0x3700000 - 0x37FFFFF area // WRAM-A mapped to the 0x3000000 - 0x303FFFF area : 256k @@ -697,8 +697,16 @@ int arm7_main (void) { // File containing cached patch offsets aFile patchOffsetCacheFile = getFileFromCluster(patchOffsetCacheFileCluster); - fileRead((char*)&patchOffsetCache, patchOffsetCacheFile, 0, sizeof(patchOffsetCacheContents)); - u16 prevPatchOffsetCacheFileVersion = patchOffsetCache.ver; + fileRead((char*)&patchOffsetCache, patchOffsetCacheFile, 0, 4); + if (patchOffsetCache.ver == patchOffsetCacheFileVersion + && patchOffsetCache.type == 2) { // 0 = Regular, 1 = B4DS, 2 = HB + fileRead((char*)&patchOffsetCache, patchOffsetCacheFile, 0, sizeof(patchOffsetCacheContents)); + } else { + patchOffsetCache.ver = patchOffsetCacheFileVersion; + patchOffsetCache.type = 2; + } + + patchOffsetCacheFilePrevCrc = swiCRC16(0xFFFF, &patchOffsetCache, sizeof(patchOffsetCacheContents)); rsetPatchCache(ndsHeader); @@ -725,7 +733,6 @@ int arm7_main (void) { patchOffsetCache.dldiOffset = patchOffset; } patchOffsetCache.dldiChecked = true; - patchOffsetCacheChanged = true; } u32* wordCommandAddr = (u32 *) (((u32)((u32*)NDS_HEADER)[0x0A])+patchOffset+0x80); @@ -747,7 +754,6 @@ int arm7_main (void) { } } patchOffsetCache.bootloaderChecked = true; - patchOffsetCacheChanged = true; } if (patchOffsetCache.bootloaderOffset) { //toncset(patchOffsetCache.bootloaderOffset, 0, 0x9C98); @@ -759,7 +765,8 @@ int arm7_main (void) { } toncset((char*)0x06000000, 0, 0x8000); - if (prevPatchOffsetCacheFileVersion != patchOffsetCacheFileVersion || patchOffsetCacheChanged) { + patchOffsetCacheFileNewCrc = swiCRC16(0xFFFF, &patchOffsetCache, sizeof(patchOffsetCacheContents)); + if (patchOffsetCacheFileNewCrc != patchOffsetCacheFilePrevCrc) { fileWrite((char*)&patchOffsetCache, patchOffsetCacheFile, 0, sizeof(patchOffsetCacheContents)); } diff --git a/hb/bootloader/source/arm7/patch_common.c b/hb/bootloader/source/arm7/patch_common.c index a7f73f029..c8494aa64 100644 --- a/hb/bootloader/source/arm7/patch_common.c +++ b/hb/bootloader/source/arm7/patch_common.c @@ -23,13 +23,11 @@ #include "common.h" #include "tonccpy.h" -u16 patchOffsetCacheFileVersion = 3; // Change when new functions are being patched, some offsets removed - // the offset order changed, and/or the function signatures changed +u16 patchOffsetCacheFilePrevCrc = 0; +u16 patchOffsetCacheFileNewCrc = 0; patchOffsetCacheContents patchOffsetCache; -bool patchOffsetCacheChanged = false; - void rsetPatchCache(const tNDSHeader* ndsHeader) { if (patchOffsetCache.ver != patchOffsetCacheFileVersion diff --git a/hb/bootloader/source/arm9/main.arm9.c b/hb/bootloader/source/arm9/main.arm9.c index 87b5d6ce6..4157af71e 100644 --- a/hb/bootloader/source/arm9/main.arm9.c +++ b/hb/bootloader/source/arm9/main.arm9.c @@ -53,13 +53,13 @@ volatile u32 arm9_ramDiskCluster = 0; void initMBKARM9(void) { // Default DSiWare settings - // WRAM-B fully mapped to arm9 // inverted order - *((vu32*)REG_MBK2)=0x8C888480; - *((vu32*)REG_MBK3)=0x9C989490; + // WRAM-B fully mapped to arm7 // inverted order + *(vu32*)REG_MBK2 = 0x9195999D; + *(vu32*)REG_MBK3 = 0x8185898D; - // WRAM-C fully mapped to arm9 // inverted order - *((vu32*)REG_MBK4)=0x8C888480; - *((vu32*)REG_MBK5)=0x9C989490; + // WRAM-C fully mapped to arm7 // inverted order + *(vu32*)REG_MBK4 = 0x9195999D; + *(vu32*)REG_MBK5 = 0x8185898D; // WRAM-A not mapped (reserved to arm7) REG_MBK6=0x00000000; diff --git a/hb/common/include/locations.h b/hb/common/include/locations.h index b0c9de033..3af45ef16 100644 --- a/hb/common/include/locations.h +++ b/hb/common/include/locations.h @@ -25,6 +25,6 @@ #define RAM_DISK_LOCATION_LZ77ROM 0x0C900000 #define RAM_DISK_LOCATION_DSIMODE 0x0D000000 -#define CACHE_ADRESS_START 0x0C800000 +#define CACHE_ADRESS_START 0x03700000 #endif // LOCATIONS_H diff --git a/hb/dldi/source/iointerface.c b/hb/dldi/source/iointerface.c index 3762d5330..3f0f46b4e 100644 --- a/hb/dldi/source/iointerface.c +++ b/hb/dldi/source/iointerface.c @@ -24,10 +24,12 @@ //#define MAX_READ 53 #define cacheBlockSize 0x8000 -#define cacheSlots 0x800000/cacheBlockSize +#define cacheSlots 0x80000/cacheBlockSize #define BYTES_PER_READ 512 #define cacheBlockSectors (cacheBlockSize/BYTES_PER_READ) +#define REG_MBK_CACHE_START 0x4004044 + #ifndef NULL #define NULL 0 #endif @@ -73,15 +75,19 @@ extern u32 dataStartOffset; //extern u32* words_msg; // word_command_offset+8 u32 word_command_offset = 0; -// NOTE: The cache code isn't working properly for some reason -/*u32 cacheDescriptor[cacheSlots] = {0xFFFFFFFF}; +bool cacheEnabled = false; +u32 cacheDescriptor[cacheSlots] = {0xFFFFFFFF}; int cacheCounter[cacheSlots]; +int cacheAllocated = 0; int accessCounter = 0; int allocateCacheSlot(void) { + cacheAllocated++; + if (cacheAllocated > cacheSlots) cacheAllocated = cacheSlots; + int slot = 0; u32 lowerCounter = accessCounter; - for (int i = 0; i < cacheSlots; i++) { + for (int i = 0; i < cacheAllocated; i++) { if (cacheCounter[i] <= lowerCounter) { lowerCounter = cacheCounter[i]; slot = i; @@ -94,7 +100,7 @@ int allocateCacheSlot(void) { } int getSlotForSector(sec_t sector) { - for (int i = 0; i < cacheSlots; i++) { + for (int i = 0; i < cacheAllocated; i++) { if (cacheDescriptor[i] == sector) { return i; } @@ -109,9 +115,17 @@ vu8* getCacheAddress(int slot) { void updateDescriptor(int slot, sec_t sector) { cacheDescriptor[slot] = sector; cacheCounter[slot] = accessCounter; -}*/ +} + +void transferToArm7(int slot) { + *((vu8*)(REG_MBK_CACHE_START+slot)) |= 0x1; +} - // Use the dldi remaining space as temporary buffer : 28k usually available +void transferToArm9(int slot) { + *((vu8*)(REG_MBK_CACHE_START+slot)) &= 0xFE; +} + +// Use the dldi remaining space as temporary buffer : 28k usually available extern vu32* tmp_buf_addr; extern vu8 allocated_space; @@ -186,7 +200,52 @@ bool sd_ReadSectors(sec_t sector, sec_t numSectors,void* buffer) { //nocashMessage("sd_ReadSectors"); FifoMessage msg; int result = 0; - if ((u32)buffer < 0x02000000 || (u32)buffer >= 0x04000000) { + if (cacheEnabled) { + accessCounter++; + + while(numSectors > 0) { + const sec_t alignedSector = (sector/cacheBlockSectors)*cacheBlockSectors; + + int slot = getSlotForSector(alignedSector); + vu8* cacheBuffer = getCacheAddress(slot); + // Read max CACHE_READ_SIZE via the main RAM cache + if (slot == -1) { + slot = allocateCacheSlot(); + + cacheBuffer = getCacheAddress(slot); + + msg.type = SDMMC_SD_READ_SECTORS; + msg.sdParams.startsector = alignedSector; + msg.sdParams.numsectors = cacheBlockSectors; + msg.sdParams.buffer = (u32*)cacheBuffer; + + transferToArm7(15-slot); + sendMsg(sizeof(msg), (u8*)&msg); + waitValue32(); + transferToArm9(15-slot); + + result = getValue32(); + } + updateDescriptor(slot, alignedSector); + + sec_t len2 = numSectors; + if ((sector - alignedSector) + numSectors > cacheBlockSectors) { + len2 = alignedSector - sector + cacheBlockSectors; + } + + __aeabi_memcpy(buffer, (u8*)cacheBuffer+((sector-alignedSector)*BYTES_PER_READ), len2*BYTES_PER_READ); + + for (u32 i = 0; i < len2; i++) { + numSectors--; + if (numSectors == 0) break; + } + if (numSectors > 0) { + sector += len2; + buffer += len2*BYTES_PER_READ; + accessCounter++; + } + } + } else if ((u32)buffer < 0x02000000 || (u32)buffer >= 0x04000000) { sec_t startsector, readsectors; int max_reads = ((1 << allocated_space) / 512) - 11; @@ -224,56 +283,67 @@ bool sd_ReadSectors(sec_t sector, sec_t numSectors,void* buffer) { result = getValue32(); } - /*sec_t alignedSector = (sector/cacheBlockSectors)*cacheBlockSectors; + return result == 0; +} + +//--------------------------------------------------------------------------------- +bool sd_WriteSectors(sec_t sector, sec_t numSectors,void* buffer) { +//--------------------------------------------------------------------------------- + //nocashMessage("sd_ReadSectors"); + FifoMessage msg; + int result = 0; + + if (cacheEnabled) { + accessCounter++; - accessCounter++; + sec_t numSectorsBak = numSectors; - while(numSectors > 0) { - int slot = getSlotForSector(sector); - vu8* cacheBuffer = getCacheAddress(slot); - // Read max CACHE_READ_SIZE via the main RAM cache - if (slot == -1) { - slot = allocateCacheSlot(); + while(numSectors > 0) { + const sec_t alignedSector = (sector/cacheBlockSectors)*cacheBlockSectors; - cacheBuffer = getCacheAddress(slot); + int slot = getSlotForSector(alignedSector); + vu8* cacheBuffer = getCacheAddress(slot); + // Read max CACHE_READ_SIZE via the main RAM cache + if (slot == -1) { + slot = allocateCacheSlot(); - msg.type = SDMMC_SD_READ_SECTORS; - msg.sdParams.startsector = alignedSector; - msg.sdParams.numsectors = cacheBlockSectors; - msg.sdParams.buffer = (u32*)cacheBuffer; + cacheBuffer = getCacheAddress(slot); - sendMsg(sizeof(msg), (u8*)&msg); + msg.type = SDMMC_SD_READ_SECTORS; + msg.sdParams.startsector = alignedSector; + msg.sdParams.numsectors = cacheBlockSectors; + msg.sdParams.buffer = (u32*)cacheBuffer; - waitValue32(); + transferToArm7(15-slot); + sendMsg(sizeof(msg), (u8*)&msg); + waitValue32(); + transferToArm9(15-slot); - result = getValue32(); - } - updateDescriptor(slot, alignedSector); + result = getValue32(); + } + updateDescriptor(slot, alignedSector); - sec_t len2 = numSectors; - if ((sector - alignedSector) + len2 > cacheBlockSectors) { - len2 = alignedSector - sector + cacheBlockSectors; - } + sec_t len2 = numSectors; + if ((sector - alignedSector) + numSectors > cacheBlockSectors) { + len2 = alignedSector - sector + cacheBlockSectors; + } - __aeabi_memcpy(buffer, (u8*)cacheBuffer+((sector-alignedSector)*BYTES_PER_READ), len2*BYTES_PER_READ); - numSectors -= len2; - if (numSectors > 0) { - sector += len2; - buffer += len2*BYTES_PER_READ; - alignedSector = (sector/cacheBlockSectors)*cacheBlockSectors; - accessCounter++; + __aeabi_memcpy((u8*)cacheBuffer+((sector-alignedSector)*BYTES_PER_READ), buffer, len2*BYTES_PER_READ); + + for (u32 i = 0; i < len2; i++) { + numSectors--; + if (numSectors == 0) break; + } + if (numSectors > 0) { + sector += len2; + buffer += len2*BYTES_PER_READ; + accessCounter++; + } } - }*/ - return result == 0; -} + numSectors = numSectorsBak; + } -//--------------------------------------------------------------------------------- -bool sd_WriteSectors(sec_t sector, sec_t numSectors,void* buffer) { -//--------------------------------------------------------------------------------- - //nocashMessage("sd_ReadSectors"); - FifoMessage msg; - int result = 0; if ((u32)buffer < 0x02000000 || (u32)buffer >= 0x04000000) { sec_t startsector, readsectors; @@ -311,59 +381,6 @@ bool sd_WriteSectors(sec_t sector, sec_t numSectors,void* buffer) { result = getValue32(); } - /*sec_t alignedSector = (sector/cacheBlockSectors)*cacheBlockSectors; - - accessCounter++; - - while(numSectors > 0) { - int slot = getSlotForSector(sector); - vu8* cacheBuffer = getCacheAddress(slot); - // Read max CACHE_READ_SIZE via the main RAM cache - if (slot == -1) { - slot = allocateCacheSlot(); - - cacheBuffer = getCacheAddress(slot); - - msg.type = SDMMC_SD_READ_SECTORS; - msg.sdParams.startsector = alignedSector; - msg.sdParams.numsectors = cacheBlockSectors; - msg.sdParams.buffer = (u32*)cacheBuffer; - - sendMsg(sizeof(msg), (u8*)&msg); - - waitValue32(); - - result = getValue32(); - } - updateDescriptor(slot, alignedSector); - - sec_t len2 = numSectors; - if ((sector - alignedSector) + len2 > cacheBlockSectors) { - len2 = alignedSector - sector + cacheBlockSectors; - } - - __aeabi_memcpy((u8*)cacheBuffer+((sector-alignedSector)*BYTES_PER_READ), buffer, len2*BYTES_PER_READ); - - msg.type = SDMMC_SD_WRITE_SECTORS; - msg.sdParams.startsector = alignedSector; - msg.sdParams.numsectors = len2; - msg.sdParams.buffer = (u8*)cacheBuffer+((sector-alignedSector)*BYTES_PER_READ); - - sendMsg(sizeof(msg), (u8*)&msg); - - waitValue32(); - - result = getValue32(); - - numSectors -= len2; - if (numSectors > 0) { - sector += len2; - buffer += len2*BYTES_PER_READ; - alignedSector = (sector/cacheBlockSectors)*cacheBlockSectors; - accessCounter++; - } - }*/ - return result == 0; } @@ -434,8 +451,17 @@ bool startup(void) { sdmmc_init(); return SD_Init()==0; } else { + for (int i = 0; i < 16; i++) { + transferToArm9(i); + } + *(vu32*)0x03700000 = 0x4253444E; // 'NDSB' + if (*(vu32*)0x03700000 == 0x4253444E) { + *(vu32*)0x03708000 = 0x77777777; + cacheEnabled = (*(vu32*)0x03700000 != *(vu32*)0x03708000); + } + word_command_offset = dataStartOffset+0x80; - word_command_offset += dsiMode ? 0x0A000000 : 0x00400000; + word_command_offset += (REG_SCFG_EXT == 0x8307F100) ? 0x0A000000 : 0xC00000; return sd_Startup(); } } diff --git a/retail/bootloaderi/source/arm9/main.arm9.c b/retail/bootloaderi/source/arm9/main.arm9.c index 53b269d8d..9e978df93 100644 --- a/retail/bootloaderi/source/arm9/main.arm9.c +++ b/retail/bootloaderi/source/arm9/main.arm9.c @@ -75,11 +75,11 @@ void initMBKARM9(void) { // WRAM-B fully mapped to arm7 // inverted order *(vu32*)REG_MBK2 = 0x9195999D; *(vu32*)REG_MBK3 = 0x8185898D; - + // WRAM-C fully mapped to arm7 // inverted order *(vu32*)REG_MBK4 = 0x9195999D; *(vu32*)REG_MBK5 = 0x8185898D; - + // WRAM-A not mapped (reserved to arm7) REG_MBK6 = 0x00000000; // WRAM-B mapped to the 0x3740000 - 0x37BFFFF area : 512k // why? only 256k real memory is there