mirror of
https://github.com/libretro/Mu.git
synced 2026-02-04 05:35:13 +00:00
Try to speed up SD card accesses, theoreticly should be a 10<->16x speed up
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
Cinco De Mayo(May 5th), 17:00 GMT, hopefully with some good ARM stuff
|
||||
Cinco De Mayo(May 5th), 17:00 GMT, hopefully with some good ARM stuff(delayed again)
|
||||
Easter release, 17:00 GMT, hopefully with some good ARM stuff(delayed)
|
||||
|
||||
Done:
|
||||
|
||||
@@ -1168,7 +1168,7 @@ void dbvzLoadBootloader(uint8_t* data, uint32_t size){
|
||||
if(!data)
|
||||
size = 0;
|
||||
|
||||
size = u32Min(size, DBVZ_BOOTLOADER_SIZE);
|
||||
size = uintMin(size, DBVZ_BOOTLOADER_SIZE);
|
||||
|
||||
//copy size bytes from buffer to bootloader area
|
||||
for(index = 0; index < size; index++)
|
||||
|
||||
@@ -107,7 +107,7 @@ int32_t pwm1FifoRunSample(int32_t now, int32_t clockOffset){
|
||||
//try to get next sample, if none are available play old sample
|
||||
if(pwm1FifoEntrys() > 0)
|
||||
pwm1ReadPosition = (pwm1ReadPosition + 1) % 6;
|
||||
dutyCycle = fMin((float)pwm1Fifo[pwm1ReadPosition] / period, 1.00);
|
||||
dutyCycle = floatMin((float)pwm1Fifo[pwm1ReadPosition] / period, 1.00);
|
||||
|
||||
for(index = 0; index < repeat; index++){
|
||||
#if !defined(EMU_NO_SAFETY)
|
||||
@@ -383,19 +383,22 @@ static void setSpiCont1(uint16_t value){
|
||||
if(value & oldSpiCont1 & 0x0200 && value & 0x0100){
|
||||
while(spi1TxFifoEntrys() > 0){
|
||||
uint16_t currentTxFifoEntry = spi1TxFifoRead();
|
||||
uint16_t newRxFifoEntry = 0x0000;
|
||||
uint16_t newRxFifoEntry;// = 0x0000;
|
||||
uint8_t bitCount = (value & 0x000F) + 1;
|
||||
uint16_t startBit = 1 << (bitCount - 1);
|
||||
uint8_t bits;
|
||||
//uint16_t startBit = 1 << (bitCount - 1);
|
||||
//uint8_t bits;
|
||||
|
||||
//debugLog("SPI1 transfer, bitCount:%d, PC:0x%08X\n", bitCount, flx68000GetPc());
|
||||
|
||||
//The most significant bit is output when the CPU loads the transmitted data, 13.2.3 SPI 1 Phase and Polarity Configurations MC68VZ328UM.pdf
|
||||
/*
|
||||
for(bits = 0; bits < bitCount; bits++){
|
||||
newRxFifoEntry <<= 1;
|
||||
newRxFifoEntry |= sdCardExchangeBit(!!(currentTxFifoEntry & startBit));
|
||||
currentTxFifoEntry <<= 1;
|
||||
}
|
||||
*/
|
||||
newRxFifoEntry = sdCardExchangeXBitsOptimized(currentTxFifoEntry, bitCount);
|
||||
|
||||
//add received data back to RX FIFO
|
||||
spi1RxFifoWrite(newRxFifoEntry);
|
||||
|
||||
@@ -111,7 +111,7 @@ uint32_t emulatorInit(buffer_t palmRomDump, buffer_t palmBootDump, uint32_t enab
|
||||
pxa255Deinit();
|
||||
return EMU_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
memcpy(palmRom, palmRomDump.data, u32Min(palmRomDump.size, TUNGSTEN_C_ROM_SIZE));
|
||||
memcpy(palmRom, palmRomDump.data, uintMin(palmRomDump.size, TUNGSTEN_C_ROM_SIZE));
|
||||
if(palmRomDump.size < TUNGSTEN_C_ROM_SIZE)
|
||||
memset(palmRom + palmRomDump.size, 0x00, TUNGSTEN_C_ROM_SIZE - palmRomDump.size);
|
||||
memset(palmRam, 0x00, TUNGSTEN_C_RAM_SIZE);
|
||||
@@ -155,7 +155,7 @@ uint32_t emulatorInit(buffer_t palmRomDump, buffer_t palmBootDump, uint32_t enab
|
||||
}
|
||||
|
||||
//set default values
|
||||
memcpy(palmRom, palmRomDump.data, u32Min(palmRomDump.size, M515_ROM_SIZE));
|
||||
memcpy(palmRom, palmRomDump.data, uintMin(palmRomDump.size, M515_ROM_SIZE));
|
||||
if(palmRomDump.size < M515_ROM_SIZE)
|
||||
memset(palmRom + palmRomDump.size, 0x00, M515_ROM_SIZE - palmRomDump.size);
|
||||
swap16BufferIfLittle(palmRom, M515_ROM_SIZE / sizeof(uint16_t));
|
||||
|
||||
@@ -251,7 +251,7 @@ void flx68000Execute(void){
|
||||
dbvzBeginClk32();
|
||||
|
||||
while(cyclesRemaining >= 1.0){
|
||||
double sysclks = dMin(cyclesRemaining, DBVZ_SYSCLK_PRECISION);
|
||||
double sysclks = floatMin(cyclesRemaining, DBVZ_SYSCLK_PRECISION);
|
||||
int32_t cpuCycles = sysclks * pctlrCpuClockDivider * palmClockMultiplier;
|
||||
|
||||
if(cpuCycles > 0)
|
||||
|
||||
@@ -33,6 +33,33 @@ static inline void swap16BufferIfBig(uint8_t* buffer, uint32_t count){
|
||||
#endif
|
||||
}
|
||||
|
||||
//custom operators
|
||||
#define SIZEOF_BITS(value) (sizeof(value) * 8)
|
||||
|
||||
static inline uintmax_t fillBottomWith0s(uintmax_t value, uint8_t count){
|
||||
return value & UINTMAX_MAX << count;
|
||||
}
|
||||
|
||||
static inline uintmax_t fillTopWith0s(uintmax_t value, uint8_t count){
|
||||
return value & UINTMAX_MAX >> count;
|
||||
}
|
||||
|
||||
static inline uintmax_t fillBottomWith1s(uintmax_t value, uint8_t count){
|
||||
return value | (UINTMAX_MAX >> SIZEOF_BITS(uintmax_t) - count);
|
||||
}
|
||||
|
||||
static inline uintmax_t fillTopWith1s(uintmax_t value, uint8_t count){
|
||||
return value | (UINTMAX_MAX << SIZEOF_BITS(uintmax_t) - count);
|
||||
}
|
||||
|
||||
static inline uintmax_t leftShiftUse1s(uintmax_t value, uint8_t count){
|
||||
return fillBottomWith1s(value << count, count);
|
||||
}
|
||||
|
||||
static inline uintmax_t rightShiftUse1s(uintmax_t value, uint8_t count){
|
||||
return fillTopWith1s(value >> count, count);
|
||||
}
|
||||
|
||||
//threads
|
||||
#if defined(EMU_MULTITHREADED)
|
||||
#define PRAGMA_STRINGIFY(x) _Pragma(#x)
|
||||
@@ -44,134 +71,43 @@ static inline void swap16BufferIfBig(uint8_t* buffer, uint32_t count){
|
||||
#endif
|
||||
|
||||
//range capping
|
||||
static inline uint8_t u8Min(uint8_t x, uint8_t y){
|
||||
static inline uintmax_t uintMin(uintmax_t x, uintmax_t y){
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
static inline uint8_t u8Max(uint8_t x, uint8_t y){
|
||||
static inline uintmax_t uintMax(uintmax_t x, uintmax_t y){
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
static inline uint8_t u8Clamp(uint8_t low, uint8_t value, uint8_t high){
|
||||
static inline uintmax_t uintClamp(uintmax_t low, uintmax_t value, uintmax_t high){
|
||||
//low must always be less than high!
|
||||
return u8Max(low, u8Min(value, high));
|
||||
return uintMax(low, uintMin(value, high));
|
||||
}
|
||||
|
||||
static inline uint16_t u16Min(uint16_t x, uint16_t y){
|
||||
static inline intmax_t intMin(intmax_t x, intmax_t y){
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
static inline uint16_t u16Max(uint16_t x, uint16_t y){
|
||||
static inline intmax_t intMax(intmax_t x, intmax_t y){
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
static inline uint16_t u16Clamp(uint16_t low, uint16_t value, uint16_t high){
|
||||
static inline intmax_t intClamp(intmax_t low, intmax_t value, intmax_t high){
|
||||
//low must always be less than high!
|
||||
return u16Max(low, u16Min(value, high));
|
||||
return intMax(low, intMin(value, high));
|
||||
}
|
||||
|
||||
static inline uint32_t u32Min(uint32_t x, uint32_t y){
|
||||
static inline double floatMin(double x, double y){
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
static inline uint32_t u32Max(uint32_t x, uint32_t y){
|
||||
static inline double floatMax(double x, double y){
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
static inline uint32_t u32Clamp(uint32_t low, uint32_t value, uint32_t high){
|
||||
static inline double floatClamp(double low, double value, double high){
|
||||
//low must always be less than high!
|
||||
return u32Max(low, u32Min(value, high));
|
||||
}
|
||||
|
||||
static inline uint64_t u64Min(uint64_t x, uint64_t y){
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
static inline uint64_t u64Max(uint64_t x, uint64_t y){
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
static inline uint64_t u64Clamp(uint64_t low, uint64_t value, uint64_t high){
|
||||
//low must always be less than high!
|
||||
return u64Max(low, u64Min(value, high));
|
||||
}
|
||||
|
||||
static inline int8_t s8Min(int8_t x, int8_t y){
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
static inline int8_t s8Max(int8_t x, int8_t y){
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
static inline int8_t s8Clamp(int8_t low, int8_t value, int8_t high){
|
||||
//low must always be less than high!
|
||||
return s8Max(low, s8Min(value, high));
|
||||
}
|
||||
|
||||
static inline int16_t s16Min(int16_t x, int16_t y){
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
static inline int16_t s16Max(int16_t x, int16_t y){
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
static inline int16_t s16Clamp(int16_t low, int16_t value, int16_t high){
|
||||
//low must always be less than high!
|
||||
return s16Max(low, s16Min(value, high));
|
||||
}
|
||||
|
||||
static inline int32_t s32Min(int32_t x, int32_t y){
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
static inline int32_t s32Max(int32_t x, int32_t y){
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
static inline int32_t s32Clamp(int32_t low, int32_t value, int32_t high){
|
||||
//low must always be less than high!
|
||||
return s32Max(low, s32Min(value, high));
|
||||
}
|
||||
|
||||
static inline int64_t s64Min(int64_t x, int64_t y){
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
static inline int64_t s64Max(int64_t x, int64_t y){
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
static inline int64_t s64Clamp(int64_t low, int64_t value, int64_t high){
|
||||
//low must always be less than high!
|
||||
return s64Max(low, s64Min(value, high));
|
||||
}
|
||||
|
||||
static inline float fMin(float x, float y){
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
static inline float fMax(float x, float y){
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
static inline float fClamp(float low, float value, float high){
|
||||
//low must always be less than high!
|
||||
return fMax(low, fMin(value, high));
|
||||
}
|
||||
|
||||
static inline double dMin(double x, double y){
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
static inline double dMax(double x, double y){
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
static inline double dClamp(double low, double value, double high){
|
||||
//low must always be less than high!
|
||||
return dMax(low, dMin(value, high));
|
||||
return floatMax(low, floatMin(value, high));
|
||||
}
|
||||
|
||||
//float platform safety
|
||||
|
||||
135
src/sdCard.c
135
src/sdCard.c
@@ -3,6 +3,7 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "emulator.h"
|
||||
#include "portability.h"
|
||||
#include "specs/sdCardCommandSpec.h"
|
||||
|
||||
|
||||
@@ -59,6 +60,21 @@ static uint16_t sdCardCrc16(uint8_t* data, uint16_t size){
|
||||
|
||||
#include "sdCardAccessors.c.h"
|
||||
|
||||
static void sdCardTopOffReadBuffer(void){
|
||||
//only call during a multi block read / palmSdCard.runningCommand == READ_MULTIPLE_BLOCK
|
||||
if(sdCardResponseFifoByteEntrys() < SD_CARD_BLOCK_SIZE){
|
||||
sdCardDoResponseDelay(1);
|
||||
if(palmSdCard.runningCommandVars[0] < palmSdCard.flashChip.size){
|
||||
sdCardDoResponseDataPacket(DATA_TOKEN_DEFAULT, palmSdCard.flashChip.data + palmSdCard.runningCommandVars[0], SD_CARD_BLOCK_SIZE);
|
||||
palmSdCard.runningCommandVars[0] += SD_CARD_BLOCK_SIZE;
|
||||
}
|
||||
else{
|
||||
sdCardDoResponseErrorToken(ET_OUT_OF_RANGE);
|
||||
palmSdCard.runningCommand = 0x00;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void sdCardReset(void){
|
||||
if(palmSdCard.flashChip.data){
|
||||
palmSdCard.command = UINT64_C(0x0000000000000000);
|
||||
@@ -100,19 +116,8 @@ bool sdCardExchangeBit(bool bit){
|
||||
outputValue = sdCardResponseFifoReadBit();
|
||||
|
||||
//if doing a multiblock read add data when running low
|
||||
if(palmSdCard.runningCommand == READ_MULTIPLE_BLOCK){
|
||||
if(sdCardResponseFifoByteEntrys() < SD_CARD_BLOCK_SIZE){
|
||||
sdCardDoResponseDelay(1);
|
||||
if(palmSdCard.runningCommandVars[0] < palmSdCard.flashChip.size){
|
||||
sdCardDoResponseDataPacket(DATA_TOKEN_DEFAULT, palmSdCard.flashChip.data + palmSdCard.runningCommandVars[0], SD_CARD_BLOCK_SIZE);
|
||||
palmSdCard.runningCommandVars[0] += SD_CARD_BLOCK_SIZE;
|
||||
}
|
||||
else{
|
||||
sdCardDoResponseErrorToken(ET_OUT_OF_RANGE);
|
||||
palmSdCard.runningCommand = 0x00;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(palmSdCard.runningCommand == READ_MULTIPLE_BLOCK)
|
||||
sdCardTopOffReadBuffer();
|
||||
|
||||
//route received bit as command or data
|
||||
if(palmSdCard.receivingCommand){
|
||||
@@ -458,3 +463,107 @@ bool sdCardExchangeBit(bool bit){
|
||||
|
||||
return outputValue;
|
||||
}
|
||||
|
||||
static uint32_t sdCardExchangeXBitsUnoptimized(uint32_t bits, uint8_t size){
|
||||
uint32_t returnBits = 0x00000000;
|
||||
uint32_t mask = 1 << size - 1;
|
||||
uint8_t count;
|
||||
|
||||
for(count = 0; count < size; count++){
|
||||
returnBits <<= 1;
|
||||
returnBits |= sdCardExchangeBit(!!(bits & mask));
|
||||
bits <<= 1;
|
||||
}
|
||||
|
||||
return returnBits;
|
||||
}
|
||||
|
||||
uint32_t sdCardExchangeXBitsOptimized(uint32_t bits, uint8_t size){
|
||||
//does the same as the above function but skips any unneeded behavior for speed
|
||||
uint32_t returnBits = 0x00000000;
|
||||
uint32_t all1s = fillBottomWith1s(0, size);
|
||||
|
||||
//clear unused bits that are passed
|
||||
bits &= all1s;
|
||||
|
||||
if(palmSdCard.flashChip.data){
|
||||
bool ignoreCmdBits = palmSdCard.commandBitsRemaining == 48 && (bits == all1s || bits == 0x00000000);
|
||||
bool safeToOptimize = !palmSdCard.receivingCommand || ignoreCmdBits || palmSdCard.commandBitsRemaining > 47 && palmSdCard.commandBitsRemaining - size < 1;
|
||||
|
||||
if(safeToOptimize){
|
||||
//check for simple cases
|
||||
if(!palmSdCard.runningCommand || palmSdCard.runningCommand == READ_MULTIPLE_BLOCK){
|
||||
//nothing will happen until this transfer is over, do fast transfer and check if FIFO needs to be refilled
|
||||
|
||||
if(!ignoreCmdBits){
|
||||
palmSdCard.command <<= size;
|
||||
palmSdCard.command |= bits;
|
||||
palmSdCard.commandBitsRemaining -= size;
|
||||
}
|
||||
|
||||
//fill return FIFO if its getting low
|
||||
if(palmSdCard.runningCommand == READ_MULTIPLE_BLOCK)
|
||||
sdCardTopOffReadBuffer();
|
||||
|
||||
switch(size){
|
||||
case 32:
|
||||
returnBits |= sdCardResponseFifoReadByteOptimized() << 24;
|
||||
case 24:
|
||||
returnBits |= sdCardResponseFifoReadByteOptimized() << 16;
|
||||
case 16:
|
||||
returnBits |= sdCardResponseFifoReadByteOptimized() << 8;
|
||||
case 8:
|
||||
returnBits |= sdCardResponseFifoReadByteOptimized();
|
||||
break;
|
||||
|
||||
default:{
|
||||
//slow method
|
||||
uint8_t count;
|
||||
|
||||
for(count = 0; count < size; count++){
|
||||
returnBits <<= 1;
|
||||
returnBits |= sdCardResponseFifoReadBit();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(palmSdCard.runningCommand == WRITE_SINGLE_BLOCK || palmSdCard.runningCommand == WRITE_MULTIPLE_BLOCK){
|
||||
//just passthrough write data
|
||||
uint32_t currentByte = palmSdCard.runningCommandVars[2] / 8;
|
||||
bool alignedProperly = size % 8 == 0 && palmSdCard.runningCommandVars[2] % 8 == 0;
|
||||
|
||||
if(alignedProperly && currentByte > 0 && currentByte + size / 8 < SD_CARD_BLOCK_DATA_PACKET_SIZE - 1){
|
||||
//byte aligned in the middle of a data packet, can just copy data over
|
||||
uint8_t count;
|
||||
|
||||
for(count = 0; count < size / 8; count++){
|
||||
palmSdCard.runningCommandPacket[currentByte] = bits >> (size - 8) - (count * 8) & 0xFF;
|
||||
palmSdCard.runningCommandVars[2] += 8;
|
||||
currentByte++;
|
||||
returnBits <<= 8;
|
||||
returnBits |= sdCardResponseFifoReadByteOptimized();
|
||||
}
|
||||
}
|
||||
else{
|
||||
//not write safe
|
||||
returnBits = sdCardExchangeXBitsUnoptimized(bits, size);
|
||||
}
|
||||
}
|
||||
else{
|
||||
//unknown condition
|
||||
returnBits = sdCardExchangeXBitsUnoptimized(bits, size);
|
||||
}
|
||||
}
|
||||
else{
|
||||
//not safe to optimize :(
|
||||
returnBits = sdCardExchangeXBitsUnoptimized(bits, size);
|
||||
}
|
||||
}
|
||||
else{
|
||||
//not connected, fill with 1s
|
||||
returnBits = all1s;
|
||||
}
|
||||
|
||||
return returnBits;
|
||||
}
|
||||
|
||||
@@ -2,10 +2,12 @@
|
||||
#define SD_CARD_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void sdCardReset(void);
|
||||
|
||||
void sdCardSetChipSelect(bool value);
|
||||
bool sdCardExchangeBit(bool bit);
|
||||
uint32_t sdCardExchangeXBitsOptimized(uint32_t bits, uint8_t size);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -19,6 +19,40 @@ static bool sdCardResponseFifoReadBit(void){
|
||||
return true;
|
||||
}
|
||||
|
||||
static uint8_t sdCardResponseFifoReadByteOptimized(void){
|
||||
if(sdCardResponseFifoByteEntrys() > 1){
|
||||
if(palmSdCard.responseReadPositionBit == 7){
|
||||
//just read the whole byte at once
|
||||
uint8_t byte = palmSdCard.responseFifo[palmSdCard.responseReadPosition];
|
||||
|
||||
palmSdCard.responseReadPosition = (palmSdCard.responseReadPosition + 1) % SD_CARD_RESPONSE_FIFO_SIZE;
|
||||
return byte;
|
||||
}
|
||||
else{
|
||||
//have to merge 2 bytes
|
||||
uint8_t byte = 0x00;
|
||||
|
||||
byte |= palmSdCard.responseFifo[palmSdCard.responseReadPosition] << 7 - palmSdCard.responseReadPositionBit;
|
||||
palmSdCard.responseReadPosition = (palmSdCard.responseReadPosition + 1) % SD_CARD_RESPONSE_FIFO_SIZE;
|
||||
byte |= palmSdCard.responseFifo[palmSdCard.responseReadPosition] >> palmSdCard.responseReadPositionBit + 1;
|
||||
return byte;
|
||||
}
|
||||
}
|
||||
else{
|
||||
//not enough bytes left, use slow accurate method
|
||||
uint8_t byte = 0x00;
|
||||
uint8_t count;
|
||||
|
||||
for(count = 0; count < 8; count++){
|
||||
byte <<= 1;
|
||||
byte |= sdCardResponseFifoReadBit();
|
||||
}
|
||||
|
||||
return byte;
|
||||
}
|
||||
return 0xFF;
|
||||
}
|
||||
|
||||
static void sdCardResponseFifoWriteByte(uint8_t value){
|
||||
if(sdCardResponseFifoByteEntrys() < SD_CARD_RESPONSE_FIFO_SIZE - 1){
|
||||
palmSdCard.responseFifo[palmSdCard.responseWritePosition] = value;
|
||||
|
||||
@@ -391,8 +391,8 @@ void sed1376Render(void){
|
||||
//debugLog("PIP state, start x:%d, end x:%d, start y:%d, end y:%d\n", pipStartX, pipEndX, pipStartY, pipEndY);
|
||||
//render PIP only if PIP window is onscreen
|
||||
if(pipStartX < 160 && pipStartY < 160){
|
||||
pipEndX = u16Min(pipEndX, 160);
|
||||
pipEndY = u16Min(pipEndY, 160);
|
||||
pipEndX = uintMin(pipEndX, 160);
|
||||
pipEndY = uintMin(pipEndY, 160);
|
||||
screenStartAddress = getPipStartAddress();
|
||||
lineSize = (sed1376Registers[PIP_LINE_SZ_1] << 8 | sed1376Registers[PIP_LINE_SZ_0]) * 4;
|
||||
MULTITHREAD_DOUBLE_LOOP(pixelX, pixelY) for(pixelY = pipStartY; pixelY < pipEndY; pixelY++)
|
||||
|
||||
Reference in New Issue
Block a user