Try to speed up SD card accesses, theoreticly should be a 10<->16x speed up

This commit is contained in:
meepingsnesroms
2019-05-11 13:34:07 -07:00
parent 8482c98010
commit 06acab1345
10 changed files with 211 additions and 127 deletions

View File

@@ -1,4 +1,4 @@
Cinco De Mayo(May 5th), 17:00 GMT, hopefully with some good ARM stuff
Cinco De Mayo(May 5th), 17:00 GMT, hopefully with some good ARM stuff(delayed again)
Easter release, 17:00 GMT, hopefully with some good ARM stuff(delayed)
Done:

View File

@@ -1168,7 +1168,7 @@ void dbvzLoadBootloader(uint8_t* data, uint32_t size){
if(!data)
size = 0;
size = u32Min(size, DBVZ_BOOTLOADER_SIZE);
size = uintMin(size, DBVZ_BOOTLOADER_SIZE);
//copy size bytes from buffer to bootloader area
for(index = 0; index < size; index++)

View File

@@ -107,7 +107,7 @@ int32_t pwm1FifoRunSample(int32_t now, int32_t clockOffset){
//try to get next sample, if none are available play old sample
if(pwm1FifoEntrys() > 0)
pwm1ReadPosition = (pwm1ReadPosition + 1) % 6;
dutyCycle = fMin((float)pwm1Fifo[pwm1ReadPosition] / period, 1.00);
dutyCycle = floatMin((float)pwm1Fifo[pwm1ReadPosition] / period, 1.00);
for(index = 0; index < repeat; index++){
#if !defined(EMU_NO_SAFETY)
@@ -383,19 +383,22 @@ static void setSpiCont1(uint16_t value){
if(value & oldSpiCont1 & 0x0200 && value & 0x0100){
while(spi1TxFifoEntrys() > 0){
uint16_t currentTxFifoEntry = spi1TxFifoRead();
uint16_t newRxFifoEntry = 0x0000;
uint16_t newRxFifoEntry;// = 0x0000;
uint8_t bitCount = (value & 0x000F) + 1;
uint16_t startBit = 1 << (bitCount - 1);
uint8_t bits;
//uint16_t startBit = 1 << (bitCount - 1);
//uint8_t bits;
//debugLog("SPI1 transfer, bitCount:%d, PC:0x%08X\n", bitCount, flx68000GetPc());
//The most significant bit is output when the CPU loads the transmitted data, 13.2.3 SPI 1 Phase and Polarity Configurations MC68VZ328UM.pdf
/*
for(bits = 0; bits < bitCount; bits++){
newRxFifoEntry <<= 1;
newRxFifoEntry |= sdCardExchangeBit(!!(currentTxFifoEntry & startBit));
currentTxFifoEntry <<= 1;
}
*/
newRxFifoEntry = sdCardExchangeXBitsOptimized(currentTxFifoEntry, bitCount);
//add received data back to RX FIFO
spi1RxFifoWrite(newRxFifoEntry);

View File

@@ -111,7 +111,7 @@ uint32_t emulatorInit(buffer_t palmRomDump, buffer_t palmBootDump, uint32_t enab
pxa255Deinit();
return EMU_ERROR_OUT_OF_MEMORY;
}
memcpy(palmRom, palmRomDump.data, u32Min(palmRomDump.size, TUNGSTEN_C_ROM_SIZE));
memcpy(palmRom, palmRomDump.data, uintMin(palmRomDump.size, TUNGSTEN_C_ROM_SIZE));
if(palmRomDump.size < TUNGSTEN_C_ROM_SIZE)
memset(palmRom + palmRomDump.size, 0x00, TUNGSTEN_C_ROM_SIZE - palmRomDump.size);
memset(palmRam, 0x00, TUNGSTEN_C_RAM_SIZE);
@@ -155,7 +155,7 @@ uint32_t emulatorInit(buffer_t palmRomDump, buffer_t palmBootDump, uint32_t enab
}
//set default values
memcpy(palmRom, palmRomDump.data, u32Min(palmRomDump.size, M515_ROM_SIZE));
memcpy(palmRom, palmRomDump.data, uintMin(palmRomDump.size, M515_ROM_SIZE));
if(palmRomDump.size < M515_ROM_SIZE)
memset(palmRom + palmRomDump.size, 0x00, M515_ROM_SIZE - palmRomDump.size);
swap16BufferIfLittle(palmRom, M515_ROM_SIZE / sizeof(uint16_t));

View File

@@ -251,7 +251,7 @@ void flx68000Execute(void){
dbvzBeginClk32();
while(cyclesRemaining >= 1.0){
double sysclks = dMin(cyclesRemaining, DBVZ_SYSCLK_PRECISION);
double sysclks = floatMin(cyclesRemaining, DBVZ_SYSCLK_PRECISION);
int32_t cpuCycles = sysclks * pctlrCpuClockDivider * palmClockMultiplier;
if(cpuCycles > 0)

View File

@@ -33,6 +33,33 @@ static inline void swap16BufferIfBig(uint8_t* buffer, uint32_t count){
#endif
}
//custom operators
#define SIZEOF_BITS(value) (sizeof(value) * 8)
static inline uintmax_t fillBottomWith0s(uintmax_t value, uint8_t count){
return value & UINTMAX_MAX << count;
}
static inline uintmax_t fillTopWith0s(uintmax_t value, uint8_t count){
return value & UINTMAX_MAX >> count;
}
static inline uintmax_t fillBottomWith1s(uintmax_t value, uint8_t count){
return value | (UINTMAX_MAX >> SIZEOF_BITS(uintmax_t) - count);
}
static inline uintmax_t fillTopWith1s(uintmax_t value, uint8_t count){
return value | (UINTMAX_MAX << SIZEOF_BITS(uintmax_t) - count);
}
static inline uintmax_t leftShiftUse1s(uintmax_t value, uint8_t count){
return fillBottomWith1s(value << count, count);
}
static inline uintmax_t rightShiftUse1s(uintmax_t value, uint8_t count){
return fillTopWith1s(value >> count, count);
}
//threads
#if defined(EMU_MULTITHREADED)
#define PRAGMA_STRINGIFY(x) _Pragma(#x)
@@ -44,134 +71,43 @@ static inline void swap16BufferIfBig(uint8_t* buffer, uint32_t count){
#endif
//range capping
static inline uint8_t u8Min(uint8_t x, uint8_t y){
static inline uintmax_t uintMin(uintmax_t x, uintmax_t y){
return x < y ? x : y;
}
static inline uint8_t u8Max(uint8_t x, uint8_t y){
static inline uintmax_t uintMax(uintmax_t x, uintmax_t y){
return x > y ? x : y;
}
static inline uint8_t u8Clamp(uint8_t low, uint8_t value, uint8_t high){
static inline uintmax_t uintClamp(uintmax_t low, uintmax_t value, uintmax_t high){
//low must always be less than high!
return u8Max(low, u8Min(value, high));
return uintMax(low, uintMin(value, high));
}
static inline uint16_t u16Min(uint16_t x, uint16_t y){
static inline intmax_t intMin(intmax_t x, intmax_t y){
return x < y ? x : y;
}
static inline uint16_t u16Max(uint16_t x, uint16_t y){
static inline intmax_t intMax(intmax_t x, intmax_t y){
return x > y ? x : y;
}
static inline uint16_t u16Clamp(uint16_t low, uint16_t value, uint16_t high){
static inline intmax_t intClamp(intmax_t low, intmax_t value, intmax_t high){
//low must always be less than high!
return u16Max(low, u16Min(value, high));
return intMax(low, intMin(value, high));
}
static inline uint32_t u32Min(uint32_t x, uint32_t y){
static inline double floatMin(double x, double y){
return x < y ? x : y;
}
static inline uint32_t u32Max(uint32_t x, uint32_t y){
static inline double floatMax(double x, double y){
return x > y ? x : y;
}
static inline uint32_t u32Clamp(uint32_t low, uint32_t value, uint32_t high){
static inline double floatClamp(double low, double value, double high){
//low must always be less than high!
return u32Max(low, u32Min(value, high));
}
static inline uint64_t u64Min(uint64_t x, uint64_t y){
return x < y ? x : y;
}
static inline uint64_t u64Max(uint64_t x, uint64_t y){
return x > y ? x : y;
}
static inline uint64_t u64Clamp(uint64_t low, uint64_t value, uint64_t high){
//low must always be less than high!
return u64Max(low, u64Min(value, high));
}
static inline int8_t s8Min(int8_t x, int8_t y){
return x < y ? x : y;
}
static inline int8_t s8Max(int8_t x, int8_t y){
return x > y ? x : y;
}
static inline int8_t s8Clamp(int8_t low, int8_t value, int8_t high){
//low must always be less than high!
return s8Max(low, s8Min(value, high));
}
static inline int16_t s16Min(int16_t x, int16_t y){
return x < y ? x : y;
}
static inline int16_t s16Max(int16_t x, int16_t y){
return x > y ? x : y;
}
static inline int16_t s16Clamp(int16_t low, int16_t value, int16_t high){
//low must always be less than high!
return s16Max(low, s16Min(value, high));
}
static inline int32_t s32Min(int32_t x, int32_t y){
return x < y ? x : y;
}
static inline int32_t s32Max(int32_t x, int32_t y){
return x > y ? x : y;
}
static inline int32_t s32Clamp(int32_t low, int32_t value, int32_t high){
//low must always be less than high!
return s32Max(low, s32Min(value, high));
}
static inline int64_t s64Min(int64_t x, int64_t y){
return x < y ? x : y;
}
static inline int64_t s64Max(int64_t x, int64_t y){
return x > y ? x : y;
}
static inline int64_t s64Clamp(int64_t low, int64_t value, int64_t high){
//low must always be less than high!
return s64Max(low, s64Min(value, high));
}
static inline float fMin(float x, float y){
return x < y ? x : y;
}
static inline float fMax(float x, float y){
return x > y ? x : y;
}
static inline float fClamp(float low, float value, float high){
//low must always be less than high!
return fMax(low, fMin(value, high));
}
static inline double dMin(double x, double y){
return x < y ? x : y;
}
static inline double dMax(double x, double y){
return x > y ? x : y;
}
static inline double dClamp(double low, double value, double high){
//low must always be less than high!
return dMax(low, dMin(value, high));
return floatMax(low, floatMin(value, high));
}
//float platform safety

View File

@@ -3,6 +3,7 @@
#include <string.h>
#include "emulator.h"
#include "portability.h"
#include "specs/sdCardCommandSpec.h"
@@ -59,6 +60,21 @@ static uint16_t sdCardCrc16(uint8_t* data, uint16_t size){
#include "sdCardAccessors.c.h"
static void sdCardTopOffReadBuffer(void){
//only call during a multi block read / palmSdCard.runningCommand == READ_MULTIPLE_BLOCK
if(sdCardResponseFifoByteEntrys() < SD_CARD_BLOCK_SIZE){
sdCardDoResponseDelay(1);
if(palmSdCard.runningCommandVars[0] < palmSdCard.flashChip.size){
sdCardDoResponseDataPacket(DATA_TOKEN_DEFAULT, palmSdCard.flashChip.data + palmSdCard.runningCommandVars[0], SD_CARD_BLOCK_SIZE);
palmSdCard.runningCommandVars[0] += SD_CARD_BLOCK_SIZE;
}
else{
sdCardDoResponseErrorToken(ET_OUT_OF_RANGE);
palmSdCard.runningCommand = 0x00;
}
}
}
void sdCardReset(void){
if(palmSdCard.flashChip.data){
palmSdCard.command = UINT64_C(0x0000000000000000);
@@ -100,19 +116,8 @@ bool sdCardExchangeBit(bool bit){
outputValue = sdCardResponseFifoReadBit();
//if doing a multiblock read add data when running low
if(palmSdCard.runningCommand == READ_MULTIPLE_BLOCK){
if(sdCardResponseFifoByteEntrys() < SD_CARD_BLOCK_SIZE){
sdCardDoResponseDelay(1);
if(palmSdCard.runningCommandVars[0] < palmSdCard.flashChip.size){
sdCardDoResponseDataPacket(DATA_TOKEN_DEFAULT, palmSdCard.flashChip.data + palmSdCard.runningCommandVars[0], SD_CARD_BLOCK_SIZE);
palmSdCard.runningCommandVars[0] += SD_CARD_BLOCK_SIZE;
}
else{
sdCardDoResponseErrorToken(ET_OUT_OF_RANGE);
palmSdCard.runningCommand = 0x00;
}
}
}
if(palmSdCard.runningCommand == READ_MULTIPLE_BLOCK)
sdCardTopOffReadBuffer();
//route received bit as command or data
if(palmSdCard.receivingCommand){
@@ -458,3 +463,107 @@ bool sdCardExchangeBit(bool bit){
return outputValue;
}
static uint32_t sdCardExchangeXBitsUnoptimized(uint32_t bits, uint8_t size){
uint32_t returnBits = 0x00000000;
uint32_t mask = 1 << size - 1;
uint8_t count;
for(count = 0; count < size; count++){
returnBits <<= 1;
returnBits |= sdCardExchangeBit(!!(bits & mask));
bits <<= 1;
}
return returnBits;
}
uint32_t sdCardExchangeXBitsOptimized(uint32_t bits, uint8_t size){
//does the same as the above function but skips any unneeded behavior for speed
uint32_t returnBits = 0x00000000;
uint32_t all1s = fillBottomWith1s(0, size);
//clear unused bits that are passed
bits &= all1s;
if(palmSdCard.flashChip.data){
bool ignoreCmdBits = palmSdCard.commandBitsRemaining == 48 && (bits == all1s || bits == 0x00000000);
bool safeToOptimize = !palmSdCard.receivingCommand || ignoreCmdBits || palmSdCard.commandBitsRemaining > 47 && palmSdCard.commandBitsRemaining - size < 1;
if(safeToOptimize){
//check for simple cases
if(!palmSdCard.runningCommand || palmSdCard.runningCommand == READ_MULTIPLE_BLOCK){
//nothing will happen until this transfer is over, do fast transfer and check if FIFO needs to be refilled
if(!ignoreCmdBits){
palmSdCard.command <<= size;
palmSdCard.command |= bits;
palmSdCard.commandBitsRemaining -= size;
}
//fill return FIFO if its getting low
if(palmSdCard.runningCommand == READ_MULTIPLE_BLOCK)
sdCardTopOffReadBuffer();
switch(size){
case 32:
returnBits |= sdCardResponseFifoReadByteOptimized() << 24;
case 24:
returnBits |= sdCardResponseFifoReadByteOptimized() << 16;
case 16:
returnBits |= sdCardResponseFifoReadByteOptimized() << 8;
case 8:
returnBits |= sdCardResponseFifoReadByteOptimized();
break;
default:{
//slow method
uint8_t count;
for(count = 0; count < size; count++){
returnBits <<= 1;
returnBits |= sdCardResponseFifoReadBit();
}
break;
}
}
}
else if(palmSdCard.runningCommand == WRITE_SINGLE_BLOCK || palmSdCard.runningCommand == WRITE_MULTIPLE_BLOCK){
//just passthrough write data
uint32_t currentByte = palmSdCard.runningCommandVars[2] / 8;
bool alignedProperly = size % 8 == 0 && palmSdCard.runningCommandVars[2] % 8 == 0;
if(alignedProperly && currentByte > 0 && currentByte + size / 8 < SD_CARD_BLOCK_DATA_PACKET_SIZE - 1){
//byte aligned in the middle of a data packet, can just copy data over
uint8_t count;
for(count = 0; count < size / 8; count++){
palmSdCard.runningCommandPacket[currentByte] = bits >> (size - 8) - (count * 8) & 0xFF;
palmSdCard.runningCommandVars[2] += 8;
currentByte++;
returnBits <<= 8;
returnBits |= sdCardResponseFifoReadByteOptimized();
}
}
else{
//not write safe
returnBits = sdCardExchangeXBitsUnoptimized(bits, size);
}
}
else{
//unknown condition
returnBits = sdCardExchangeXBitsUnoptimized(bits, size);
}
}
else{
//not safe to optimize :(
returnBits = sdCardExchangeXBitsUnoptimized(bits, size);
}
}
else{
//not connected, fill with 1s
returnBits = all1s;
}
return returnBits;
}

View File

@@ -2,10 +2,12 @@
#define SD_CARD_H
#include <stdbool.h>
#include <stdint.h>
void sdCardReset(void);
void sdCardSetChipSelect(bool value);
bool sdCardExchangeBit(bool bit);
uint32_t sdCardExchangeXBitsOptimized(uint32_t bits, uint8_t size);
#endif

View File

@@ -19,6 +19,40 @@ static bool sdCardResponseFifoReadBit(void){
return true;
}
static uint8_t sdCardResponseFifoReadByteOptimized(void){
if(sdCardResponseFifoByteEntrys() > 1){
if(palmSdCard.responseReadPositionBit == 7){
//just read the whole byte at once
uint8_t byte = palmSdCard.responseFifo[palmSdCard.responseReadPosition];
palmSdCard.responseReadPosition = (palmSdCard.responseReadPosition + 1) % SD_CARD_RESPONSE_FIFO_SIZE;
return byte;
}
else{
//have to merge 2 bytes
uint8_t byte = 0x00;
byte |= palmSdCard.responseFifo[palmSdCard.responseReadPosition] << 7 - palmSdCard.responseReadPositionBit;
palmSdCard.responseReadPosition = (palmSdCard.responseReadPosition + 1) % SD_CARD_RESPONSE_FIFO_SIZE;
byte |= palmSdCard.responseFifo[palmSdCard.responseReadPosition] >> palmSdCard.responseReadPositionBit + 1;
return byte;
}
}
else{
//not enough bytes left, use slow accurate method
uint8_t byte = 0x00;
uint8_t count;
for(count = 0; count < 8; count++){
byte <<= 1;
byte |= sdCardResponseFifoReadBit();
}
return byte;
}
return 0xFF;
}
static void sdCardResponseFifoWriteByte(uint8_t value){
if(sdCardResponseFifoByteEntrys() < SD_CARD_RESPONSE_FIFO_SIZE - 1){
palmSdCard.responseFifo[palmSdCard.responseWritePosition] = value;

View File

@@ -391,8 +391,8 @@ void sed1376Render(void){
//debugLog("PIP state, start x:%d, end x:%d, start y:%d, end y:%d\n", pipStartX, pipEndX, pipStartY, pipEndY);
//render PIP only if PIP window is onscreen
if(pipStartX < 160 && pipStartY < 160){
pipEndX = u16Min(pipEndX, 160);
pipEndY = u16Min(pipEndY, 160);
pipEndX = uintMin(pipEndX, 160);
pipEndY = uintMin(pipEndY, 160);
screenStartAddress = getPipStartAddress();
lineSize = (sed1376Registers[PIP_LINE_SZ_1] << 8 | sed1376Registers[PIP_LINE_SZ_0]) * 4;
MULTITHREAD_DOUBLE_LOOP(pixelX, pixelY) for(pixelY = pipStartY; pixelY < pipEndY; pixelY++)