From a3fb8e1f66976f243a4e81ba49cec1573bb3500f Mon Sep 17 00:00:00 2001 From: TascoDLX Date: Tue, 31 Dec 2024 21:21:05 -0500 Subject: [PATCH 1/2] md: fix 32X stereo pwm bug (from commit d7dfae7) --- ares/md/m32x/pwm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ares/md/m32x/pwm.cpp b/ares/md/m32x/pwm.cpp index f326d115b1..7252fa7020 100644 --- a/ares/md/m32x/pwm.cpp +++ b/ares/md/m32x/pwm.cpp @@ -35,7 +35,7 @@ auto M32X::PWM::main() -> void { while(counter >= 522) { counter -= 522; auto left = cycle > 0 ? lsample / (f32)cycle : 0; - auto right = cycle > 0 ? lsample / (f32)cycle : 0; + auto right = cycle > 0 ? rsample / (f32)cycle : 0; stream->frame(left, right); } From cc17a2f5c9b77ab1a852bc6a84d8826fbcd7a407 Mon Sep 17 00:00:00 2001 From: TascoDLX Date: Tue, 31 Dec 2024 21:23:46 -0500 Subject: [PATCH 2/2] md: vdp accuracy and timing fixes + CRAM dots - fix vdp pixel clock steps for accuracy (based on timings documented by Nemesis) - emulate dma & fifo latency; increased timing accuracy, including dma interactions with refresh - adjust sprite scan out timing (according to Mask of Destiny / BlastEm source) - fix screen framing and bugged PAL screen alignment - render cram write dot artifacts --- ares/md/system/serialization.cpp | 2 +- ares/md/vdp/dac.cpp | 55 +++++- ares/md/vdp/dma.cpp | 33 ++-- ares/md/vdp/fifo.cpp | 34 ++-- ares/md/vdp/io.cpp | 11 +- ares/md/vdp/main.cpp | 276 +++++++++++++++++-------------- ares/md/vdp/memory.cpp | 2 + ares/md/vdp/prefetch.cpp | 17 +- ares/md/vdp/serialization.cpp | 10 +- ares/md/vdp/sprite.cpp | 10 +- ares/md/vdp/vdp.cpp | 84 +++++----- ares/md/vdp/vdp.hpp | 37 +++-- 12 files changed, 349 insertions(+), 222 deletions(-) diff --git a/ares/md/system/serialization.cpp b/ares/md/system/serialization.cpp index a898c7d7d2..1813e99bc2 100644 --- a/ares/md/system/serialization.cpp +++ b/ares/md/system/serialization.cpp @@ -1,4 +1,4 @@ -static const string SerializerVersion = "v135"; +static const string SerializerVersion = "v141"; auto System::serialize(bool synchronize) -> serializer { if(synchronize) scheduler.enter(Scheduler::Mode::Synchronize); diff --git a/ares/md/vdp/dac.cpp b/ares/md/vdp/dac.cpp index c34165308b..58842182af 100644 --- a/ares/md/vdp/dac.cpp +++ b/ares/md/vdp/dac.cpp @@ -55,16 +55,61 @@ template auto VDP::DAC::pixel(u32 x) -> void { output<_h40>(pixel.backdrop << 11 | mode << 9 | color); } +auto pixelIndex(n9 hpos) -> maybe { + if(vdp.h40()) { + if(hpos < 0x00d || hpos > 0x167) return nothing; + return (hpos-0x00d)*4; + } else { + if(hpos < 0x00b || hpos > 0x125) return nothing; + return (hpos-0x00b)*5; + } +} + +template inline auto VDP::DAC::fillBorder(n8 ofst) -> void { + if(!pixels) return; + if(ofst >= _size) return; + + u32 hpos = (vdp.h40() ? _h40Pos : _h32Pos) + ofst; + u32 idx = pixelIndex(hpos)(); + n32 px = 0b101 << 9 | vdp.cram.color(vdp.io.backgroundColor); + for(auto n : range((_size-ofst)*(vdp.h40()?4:5))) + pixels[idx+n] = px; +} + +auto VDP::DAC::fillLeftBorder(n8 ofst) -> void { + fillBorder<13,0x00b,0x00d>(ofst); +} + +auto VDP::DAC::fillRightBorder(n8 ofst) -> void { + fillBorder<14,0x118,0x15a>(ofst); +} + +auto VDP::DAC::dot(n9 hpos, n9 color) -> void { + if(!pixels) return; + + if(auto i = pixelIndex(hpos)) { + u32 index = i(); + n32 px = 0b101 << 9 | color; + pixels[index++] = px; + pixels[index++] = px; + pixels[index++] = px; + pixels[index++] = px; + if(vdp.h40()) return; + pixels[index++] = px; + } +} + template auto VDP::DAC::output(n32 color) -> void { - *pixels++ = color; - *pixels++ = color; - *pixels++ = color; - *pixels++ = color; + *active++ = color; + *active++ = color; + *active++ = color; + *active++ = color; if(_h40) return; - *pixels++ = color; + *active++ = color; } auto VDP::DAC::power(bool reset) -> void { test = {}; pixels = nullptr; + active = nullptr; } diff --git a/ares/md/vdp/dma.cpp b/ares/md/vdp/dma.cpp index 03fb6a3917..b8aa4dae14 100644 --- a/ares/md/vdp/dma.cpp +++ b/ares/md/vdp/dma.cpp @@ -9,13 +9,21 @@ auto VDP::DMA::synchronize() -> void { } } +auto VDP::DMA::fetch() -> void { + if(active && !read) { + auto address = mode.bit(0) << 23 | source << 1; + data = bus.read(1, 1, address); + read = 1; + } +} + auto VDP::DMA::run() -> bool { if(vdp.command.pending && !wait) { - if(mode <= 1 && !vdp.fifo.full()) { + if(mode <= 1 && !vdp.fifo.full() && read) { return load(), true; - } else if(mode == 2 && vdp.fifo.empty()) { + } else if(mode == 2 && vdp.fifo.empty() && !vdp.state.rambusy) { return fill(), true; - } else if(mode == 3) { + } else if(mode == 3 && !vdp.state.rambusy) { return copy(), true; } } @@ -23,16 +31,16 @@ auto VDP::DMA::run() -> bool { } auto VDP::DMA::load() -> void { - if(delay > 0) { delay--; return; } + read = 0; + vdp.fifo.write(vdp.command.target, vdp.command.address, data); + auto address = mode.bit(0) << 23 | source << 1; - if(vdp.refreshing()) return; // bus not available - auto data = bus.read(1, 1, address); - vdp.writeDataPort(data); vdp.debugger.dmaLoad(address, vdp.command.target, vdp.command.address, data); source.bit(0,15)++; + vdp.command.address += vdp.command.increment; if(--length == 0) { - vdp.command.pending = 0; + vdp.command.pending = 0; wait = 1; preload = 0; synchronize(); } } @@ -43,12 +51,14 @@ auto VDP::DMA::fill() -> void { case 3: vdp.cram.write(vdp.command.address >> 1, data); break; case 5: vdp.vsram.write(vdp.command.address >> 1, data); break; } + vdp.state.rambusy = 1; + vdp.debugger.dmaFill(vdp.command.target, vdp.command.address, data); source.bit(0,15)++; vdp.command.address += vdp.command.increment; if(--length == 0) { - vdp.command.pending = 0; + vdp.command.pending = 0; wait = 1; synchronize(); } } @@ -58,17 +68,20 @@ auto VDP::DMA::copy() -> void { if(!read) { read = 1; data = vdp.vram.readByte(source ^ 1); + vdp.state.rambusy = 1; return; } read = 0; vdp.vram.writeByte(vdp.command.address ^ 1, data); + vdp.state.rambusy = 1; + vdp.debugger.dmaCopy(source, vdp.command.target, vdp.command.address ^ 1, data); source.bit(0,15)++; vdp.command.address += vdp.command.increment; if(--length == 0) { - vdp.command.pending = 0; + vdp.command.pending = 0; wait = 1; synchronize(); } } diff --git a/ares/md/vdp/fifo.cpp b/ares/md/vdp/fifo.cpp index 9f36c010b7..e2d2f651a0 100644 --- a/ares/md/vdp/fifo.cpp +++ b/ares/md/vdp/fifo.cpp @@ -1,4 +1,19 @@ +auto VDP::FIFO::tick() -> void { + for(auto& slot : slots) + if(!slot.empty() && slot.latency > 0) + slot.latency--; +} + auto VDP::FIFO::advance() -> void { + if(vdp.command.pending && vdp.dma.mode == 2) { + if(slots[0].target == 1) + vdp.dma.data = slots[0].data; + else + vdp.dma.data = slots[1].data; // fill data taken from next fifo slot (late fetch) + vdp.dma.read = 1; + vdp.dma.wait = 0; // start pending DMA if necessary + } + swap(slots[0], slots[1]); swap(slots[1], slots[2]); swap(slots[2], slots[3]); @@ -6,6 +21,8 @@ auto VDP::FIFO::advance() -> void { auto VDP::FIFO::run() -> bool { if(empty()) return false; + if(slots[0].latency > 0) return false; + if(vdp.dma.active && vdp.dma.preload > 0) return false; if(slots[0].target == 1 && vdp.vram.mode == 0) { if(slots[0].lower) { @@ -16,10 +33,6 @@ auto VDP::FIFO::run() -> bool { if(slots[0].upper) { slots[0].upper = 0; vdp.vram.writeByte(slots[0].address, slots[0].data.byte(1)); - if(vdp.command.pending && vdp.dma.mode == 2) { - vdp.dma.data = slots[0].data; - vdp.dma.wait = 0; // start pending DMA - } return advance(), true; } } @@ -36,13 +49,6 @@ auto VDP::FIFO::run() -> bool { if(slots[0].upper) { slots[0].upper = 0; // null action - if(vdp.command.pending && vdp.dma.mode == 2) { - // trigger action here is speculative/untested - // but it follows from the (normal) write case - debug(unusual, "[VDP::FIFO] dma fill start"); - vdp.dma.data = slots[0].data; - vdp.dma.wait = 0; // start pending DMA - } return advance(), true; } } @@ -56,11 +62,6 @@ auto VDP::FIFO::run() -> bool { else debug(unusual, "[VDP::FIFO] write target = 0x", hex(slots[0].target)); - if(vdp.command.pending && vdp.dma.mode == 2) { - vdp.dma.data = slots[1].data; // fill data taken from next fifo slot (late fetch) - vdp.dma.wait = 0; // start pending DMA - } - slots[0].lower = 0; slots[0].upper = 0; return advance(), true; @@ -83,6 +84,7 @@ auto VDP::FIFO::write(n4 target, n17 address, n16 data) -> void { slot.data = data; slot.upper = 1; slot.lower = 1; + slot.latency = 2; return; } } diff --git a/ares/md/vdp/io.cpp b/ares/md/vdp/io.cpp index 7ed811eb65..50fd3628ef 100644 --- a/ares/md/vdp/io.cpp +++ b/ares/md/vdp/io.cpp @@ -154,7 +154,7 @@ auto VDP::readControlPort() -> n16 { result.bit( 0) = Region::PAL(); result.bit( 1) = command.pending; result.bit( 2) = hblank(); - result.bit( 3) = vblank() || !io.displayEnable; + result.bit( 3) = vblank() || !displayEnable(); result.bit( 4) = io.interlaceMode.bit(0) && field(); result.bit( 5) = sprite.collision; result.bit( 6) = sprite.overflow; @@ -187,8 +187,13 @@ auto VDP::writeControlPort(n16 data) -> void { prefetch.read(command.target, command.address); - if(command.pending && dma.mode == 1) dma.delay = 4; // based on measurement noted by Mask of Destiny - dma.wait = dma.mode == 2; + if(command.pending && dma.mode != 2) { + // dma preload init is tuned based on Direct Color DMA demos; lower values lead to instability + if(dma.mode < 2) dma.preload = 7; + dma.read = 0; + dma.wait = 0; + } + dma.synchronize(); return; } diff --git a/ares/md/vdp/main.cpp b/ares/md/vdp/main.cpp index 28aefb23b6..754384ab30 100644 --- a/ares/md/vdp/main.cpp +++ b/ares/md/vdp/main.cpp @@ -3,24 +3,68 @@ auto VDP::step(u32 clocks) -> void { Thread::synchronize(cpu); } -template auto VDP::tick() -> void { - step(cycles[0] + cycles[1]); - cycles += 2; - state.hcounter++; +template auto VDP::fullslotStep() -> void { + // EDCLK hybrid rate : 15cyc @ MClk/5 + 2cyc @ MClk/4 + static u8 EDCLK[17] = {5,5,5,5,5, 5,5,5,5,5, 5,5,5,5,5, 4,4}; + if(( _h40 && latch.clockSelect && hcounter() >= 0xe6 && hcounter() <= 0xf6) || + (!_h40 && latch.clockSelect && hcounter() >= 0xec && hcounter() <= 0xfc)) { + u32 q1 = EDCLK[state.edclkPos]; + state.edclkPos = (state.edclkPos+1) % 17; + u32 q2 = EDCLK[state.edclkPos]; + state.edclkPos = (state.edclkPos+1) % 17; + u32 q3 = EDCLK[state.edclkPos]; + state.edclkPos = (state.edclkPos+1) % 17; + u32 q4 = EDCLK[state.edclkPos]; + state.edclkPos = (state.edclkPos+1) % 17; + step(q1+q2+q3+q4); + return; + } + if(_h40) + step(4+4+4+4); // MClk/4 + else + step(5+5+5+5); // MClk/5 +} - if(_h40) { - if(hcounter() == 0x00) hblank(0), vedge(); - else if(hcounter() == 0xa3) vtick(); - else if(hcounter() == 0xb3) hblank(1); - else if(hcounter() == 0xb6) state.hcounter = 0xe4; +template auto VDP::tick() -> void { + // Run DMA here -- fifo & prefetch have ram priority, so somes ops may be blocked + dma.run(); + + fullslotStep<_h40>(); + htick<_h40>(); // +2 pixels + + if(cram.bus.active) { + vdp.dac.dot(hcounter()*2+1, cram.bus.data); + vdp.dac.dot(hcounter()*2+2, cram.bus.data); + // DAC dot artifacts may be drawn continuously in the case of consectutive writes. + // We can detect for this by checking for an impending CRAM write thru the fifo. + // If refresh or fifo delay occurs, the data will not be updated, resulting in an extended dot. + if(displayEnable() || fifo.slots[0].empty() || fifo.slots[0].target != 3) cram.bus.active = 0; + } + + // There is reportedly a latch effect when enabling the display, but it might be a fixed delay + // rather than a wait until the fifo clears. So, this is precautionary and not necessily correct. + if(latch.displayEnable > io.displayEnable || fifo.empty()) + latch.displayEnable = io.displayEnable; + + if(_refresh) { + vram.refreshing = 1; + + // The start of a DMA load will be aligned if it coincides with a refresh slot. + // The duration may differ between H32 & H40 due to pixel clock, or this may just + // be the result of some other emulation inaccuracy. Either way, this works for now. + if(dma.active && dma.preload > 0) dma.preload = h40()?6:4; } else { - if(hcounter() == 0x00) hblank(0), vedge(); - else if(hcounter() == 0x83) vtick(); - else if(hcounter() == 0x93) hblank(1); - else if(hcounter() == 0x94) state.hcounter = 0xe9; + fifo.tick(); + + // When display is blanked, DMA load fetch may be performed in every slot + // except for refresh slots and any slot immediately following refresh. + if(dma.active && !vram.refreshing) dma.fetch(); + vram.refreshing = 0; + + if(dma.active && dma.preload > 0) dma.preload--; } - irq.poll(); + state.rambusy = 1; } auto VDP::vblankcheck() -> void { @@ -34,6 +78,26 @@ auto VDP::vblankcheck() -> void { } } +template auto VDP::htick() -> void { + state.hcounter++; + + if(_h40) { + if(hcounter() == 0x00) vedge(); + else if(hcounter() == 0x05) hblank(0); + else if(hcounter() == 0xa5) vtick(); + else if(hcounter() == 0xb3) hblank(1); + else if(hcounter() == 0xb6) state.hcounter = 0xe4; + } else { + if(hcounter() == 0x00) vedge(); + else if(hcounter() == 0x05) hblank(0); + else if(hcounter() == 0x85) vtick(); + else if(hcounter() == 0x93) hblank(1); + else if(hcounter() == 0x94) state.hcounter = 0xe9; + } + + irq.poll(); +} + auto VDP::vtick() -> void { if(vblank()) { irq.hblank.counter = irq.hblank.frequency; @@ -43,15 +107,10 @@ auto VDP::vtick() -> void { debugger.interrupt(CPU::Interrupt::HorizontalBlank); } - state.vcounter++; - if(v28()) { - if(vcounter() == 0x0eb && Region::NTSC()) state.vcounter = 0x1e5; - if(vcounter() == 0x103 && Region::PAL ()) state.vcounter = 0x1ca; - } - if(v30()) { - if(vcounter() == 0x200 && Region::NTSC()) state.vcounter = 0x000; - if(vcounter() == 0x10b && Region::PAL ()) state.vcounter = 0x1d2; - } + if(vcounter() == state.bottomline) + state.vcounter = state.topline; + else + state.vcounter++; vblankcheck(); } @@ -67,6 +126,7 @@ auto VDP::hblank(bool line) -> void { auto VDP::vblank(bool line) -> void { irq.vblank.transitioned |= state.vblank ^ line; + if(state.vblank > line) state.topline = vcounter(); state.vblank = line; } @@ -86,31 +146,32 @@ auto VDP::vedge() -> void { } auto VDP::slot() -> void { - if(!fifo.run()) prefetch.run(); - dma.run(); -} - -auto VDP::refresh(bool active) -> void { - vram.refreshing = active; + state.rambusy = 0; + if(!(state.rambusy = fifo.run())) + state.rambusy = prefetch.run(); } auto VDP::main() -> void { latch.displayWidth = io.displayWidth; latch.clockSelect = io.clockSelect; + state.edclkPos = 0; if(h32()) mainH32(); + else if(h40()) mainH40(); - if(vcounter() == 0) { + if(vcounter() == state.bottomline) { screen->setColorBleedWidth(latch.displayWidth ? 4 : 5); latch.interlace = io.interlaceMode == 3; latch.overscan = io.overscan; frame(); state.field ^= 1; + updateScreenParams(); } } auto VDP::mainH32() -> void { - auto pixels = dac.pixels = vdp.pixels(); - cycles = &cyclesH32[edclk()][0]; + dac.pixels = vdp.pixels(); + auto pixels = dac.active = dac.pixels+13*5; + state.hcounter = 0; sprite.begin(); if(dac.pixels) blocks(); @@ -124,24 +185,30 @@ auto VDP::mainH32() -> void { layers.vscrollFetch(); sprite.end(); - for(auto cycle : range(13)) { + for(auto cycle : range(4)) { tick(); sprite.patternFetch(cycle + 0); } - tick(); slot(); for(auto cycle : range(13)) { - tick(); sprite.patternFetch(cycle + 13); + tick(); sprite.patternFetch(cycle + 4); sprite.scan(); + } + // Placement of this free slot conflicts with documentation by Nemesis which has it 4 slots earlier, + // but this works more reliably with the Direct Color DMA demos. + tick(); slot(); + // window begin call (reg latch) is placed here due to garbage line edge case in International Superstar Soccer Deluxe (E) + window.begin(); + for(auto cycle : range(9)) { + tick(); sprite.patternFetch(cycle + 17); sprite.scan(); } tick(); slot(); layerA.begin(); layerB.begin(); - window.begin(); tick(); layers.hscrollFetch(); - tick(); sprite.patternFetch(26); - tick(); sprite.patternFetch(27); - tick(); sprite.patternFetch(28); - tick(); sprite.patternFetch(29); + tick(); sprite.patternFetch(26); sprite.scan(); + tick(); sprite.patternFetch(27); sprite.scan(); + tick(); sprite.patternFetch(28); sprite.scan(); + tick(); sprite.patternFetch(29); sprite.scan(); layers.vscrollFetch(-1); layerA.attributesFetch(); @@ -149,18 +216,23 @@ auto VDP::mainH32() -> void { window.attributesFetch(-1); tick(); layerA.mappingFetch(-1); - tick(); !displayEnable() ? refresh(true) : sprite.patternFetch(30); - tick(); layerA.patternFetch( 0); refresh(false); - tick(); layerA.patternFetch( 1); + if(!displayEnable()) { + tick(); //refresh + } else { + tick(); sprite.patternFetch(30); sprite.scan(); + } + tick(); layerA.patternFetch( 0); sprite.scan(); + tick(); layerA.patternFetch( 1); sprite.scan(); tick(); layerB.mappingFetch(-1); - tick(); sprite.patternFetch(31); - tick(); layerB.patternFetch( 0); - tick(); layerB.patternFetch( 1); + tick(); sprite.patternFetch(31); sprite.scan(); + tick(); layerB.patternFetch( 0); sprite.scan(); + tick(); layerB.patternFetch( 1); sprite.scan(); } auto VDP::mainH40() -> void { - auto pixels = dac.pixels = vdp.pixels(); - cycles = &cyclesH40[edclk()][0]; + dac.pixels = vdp.pixels(); + auto pixels = dac.active = dac.pixels+13*4; + state.hcounter = 0; sprite.begin(); if(dac.pixels) blocks(); @@ -174,12 +246,15 @@ auto VDP::mainH40() -> void { layers.vscrollFetch(); sprite.end(); - for(auto cycle : range(23)) { + for(auto cycle : range(4)) { tick(); sprite.patternFetch(cycle + 0); } + for(auto cycle : range(19)) { + tick(); sprite.patternFetch(cycle + 4); sprite.scan(); + } tick(); slot(); for(auto cycle : range(11)) { - tick(); sprite.patternFetch(cycle + 23); + tick(); sprite.patternFetch(cycle + 23); sprite.scan(); } layerA.begin(); @@ -187,10 +262,10 @@ auto VDP::mainH40() -> void { window.begin(); tick(); layers.hscrollFetch(); - tick(); sprite.patternFetch(34); - tick(); sprite.patternFetch(35); - tick(); sprite.patternFetch(36); - tick(); sprite.patternFetch(37); + tick(); sprite.patternFetch(34); sprite.scan(); + tick(); sprite.patternFetch(35); sprite.scan(); + tick(); sprite.patternFetch(36); sprite.scan(); + tick(); sprite.patternFetch(37); sprite.scan(); layers.vscrollFetch(-1); layerA.attributesFetch(); @@ -198,26 +273,35 @@ auto VDP::mainH40() -> void { window.attributesFetch(-1); tick(); layerA.mappingFetch(-1); - tick(); !displayEnable() ? refresh(true) : sprite.patternFetch(38); - tick(); layerA.patternFetch( 0); refresh(false); - tick(); layerA.patternFetch( 1); + if(!displayEnable()) { + tick(); //refresh + } else { + tick(); sprite.patternFetch(38); sprite.scan(); + } + tick(); layerA.patternFetch( 0); sprite.scan(); + tick(); layerA.patternFetch( 1); sprite.scan(); tick(); layerB.mappingFetch(-1); - tick(); sprite.patternFetch(39); - tick(); layerB.patternFetch( 0); - tick(); layerB.patternFetch( 1); + tick(); sprite.patternFetch(39); sprite.scan(); + tick(); layerB.patternFetch( 0); sprite.scan(); + tick(); layerB.patternFetch( 1); sprite.scan(); } template auto VDP::blocks() -> void { - bool den = displayEnable(); - bool vc = vcounter() == 0x1ff; + bool top = vcounter() == state.topline; + dac.fillLeftBorder(); for(auto block : range(_h40 ? 20 : 16)) { layers.vscrollFetch(block); layerA.attributesFetch(); layerB.attributesFetch(); window.attributesFetch(block); tick<_h40>(); layerA.mappingFetch(block); - tick<_h40>(); (block & 3) != 3 ? slot() : refresh(true); - tick<_h40>(); layerA.patternFetch(block * 2 + 2); refresh(false); + if((block & 3) == 3) { + tick<_h40,true>(); //refresh + } else { + tick<_h40>(); slot(); + } + bool den = displayEnable(); + tick<_h40>(); layerA.patternFetch(block * 2 + 2); tick<_h40>(); layerA.patternFetch(block * 2 + 3); tick<_h40>(); layerB.mappingFetch(block); tick<_h40>(); sprite.mappingFetch(block); @@ -225,73 +309,13 @@ template auto VDP::blocks() -> void { tick<_h40>(); layerB.patternFetch(block * 2 + 3); if(_pixels) { - if(!den || vc) { + if(!den || top) { for(auto pixel: range(16)) dac.pixel<_h40, false>(block * 16 + pixel); } else { for(auto pixel: range(16)) dac.pixel<_h40, true>(block * 16 + pixel); } } } + dac.fillRightBorder(); } -//timings are approximations; exact positions of slow/normal/fast cycles are not known -auto VDP::generateCycleTimings() -> void { - //full lines - //========== - - //H32/DCLK: 342 slow + 0 normal + 0 fast = 3420 cycles - for(auto cycle : range(342)) cyclesH32[0][cycle * 1] = 10; - - //H32/EDCLK: 21 slow + 3 normal + 318 fast = 2781 cycles - for(auto cycle : range(342)) cyclesH32[1][cycle * 1] = 8; - for(auto cycle : range( 24)) cyclesH32[1][cycle * 14] = 10; - for(auto cycle : range( 3)) cyclesH32[1][cycle * 14] = 9; - - //H40/DCLK: 0 slow + 0 normal + 420 fast = 3360 cycles - for(auto cycle : range(420)) cyclesH40[0][cycle * 1] = 8; - - //H40/EDCLK: 28 slow + 4 normal + 388 fast = 3420 cycles - for(auto cycle : range(420)) cyclesH40[1][cycle * 1] = 8; - for(auto cycle : range( 32)) cyclesH40[1][cycle * 13] = 10; - for(auto cycle : range( 4)) cyclesH40[1][cycle * 13] = 9; - - //half lines - //========== - - //H32/DCLK: 171 slow + 0 normal + 0 fast = 1710 cycles - for(auto cycle : range(171)) halvesH32[0][cycle * 1] = 10; - - //H32/EDCLK: 10 slow + 2 normal + 159 fast = 1390 cycles - for(auto cycle : range(171)) halvesH32[1][cycle * 1] = 8; - for(auto cycle : range( 12)) halvesH32[1][cycle * 14] = 10; - for(auto cycle : range( 2)) halvesH32[1][cycle * 14] = 9; - - //H40/DCLK: 0 slow + 0 normal + 210 fast = 1680 cycles - for(auto cycle : range(210)) halvesH40[0][cycle * 1] = 8; - - //H40/EDCLK: 14 slow + 2 normal + 194 fast = 1710 cycles - for(auto cycle : range(210)) halvesH40[1][cycle * 1] = 8; - for(auto cycle : range( 16)) halvesH40[1][cycle * 13] = 10; - for(auto cycle : range( 2)) halvesH40[1][cycle * 13] = 9; - - //active even half lines - //====================== - - //H32/DCLK: 171 slow + 0 normal + 0 fast = 1710 cycles - for(auto cycle : range(171)) extrasH32[0][cycle * 1] = 10; - - //H32/EDCLK: 21 slow + 3 normal + 147 fast = 1413 cycles - for(auto cycle : range(171)) extrasH32[1][cycle * 1] = 8; - for(auto cycle : range( 24)) extrasH32[1][cycle * 7] = 10; - for(auto cycle : range( 3)) extrasH32[1][cycle * 7] = 9; - - //H40/DCLK: 0 slow + 0 normal + 210 fast = 1680 cycles - for(auto cycle : range(171)) extrasH40[0][cycle * 1] = 8; - - //H40/EDCLK: 28 slow + 4 normal + 178 fast = 1740 cycles - for(auto cycle : range(171)) extrasH40[1][cycle * 1] = 8; - for(auto cycle : range( 32)) extrasH40[1][cycle * 5] = 10; - for(auto cycle : range( 4)) extrasH40[1][cycle * 5] = 9; - - cycles = nullptr; -} diff --git a/ares/md/vdp/memory.cpp b/ares/md/vdp/memory.cpp index bd686d92b8..0f99d78f49 100644 --- a/ares/md/vdp/memory.cpp +++ b/ares/md/vdp/memory.cpp @@ -50,4 +50,6 @@ auto VDP::CRAM::read(n6 address) const -> n16 { auto VDP::CRAM::write(n6 address, n16 data) -> void { data = data.bit(1,3) << 0 | data.bit(5,7) << 3 | data.bit(9,11) << 6; memory[address] = data; + bus.data = data; + bus.active = 1; } diff --git a/ares/md/vdp/prefetch.cpp b/ares/md/vdp/prefetch.cpp index 79336ba05e..6bea4cd698 100644 --- a/ares/md/vdp/prefetch.cpp +++ b/ares/md/vdp/prefetch.cpp @@ -2,12 +2,17 @@ auto VDP::Prefetch::run() -> bool { if(full()) return false; if(vdp.command.target == 0 && vdp.vram.mode == 0) { - slot.lower = 1; - slot.upper = 1; - slot.data.byte(0) = vdp.vram.readByte(vdp.command.address & ~1 | 1); - slot.data.byte(1) = vdp.vram.readByte(vdp.command.address & ~1 | 0); - vdp.command.ready = 1; - return true; + if(!slot.lower) { + slot.lower = 1; + slot.data.byte(0) = vdp.vram.readByte(vdp.command.address & ~1 | 1); + return true; + } + if(!slot.upper) { + slot.data.byte(1) = vdp.vram.readByte(vdp.command.address & ~1 | 0); + slot.upper = 1; + vdp.command.ready = 1; + return true; + } } if(vdp.command.target == 0 && vdp.vram.mode == 1) { diff --git a/ares/md/vdp/serialization.cpp b/ares/md/vdp/serialization.cpp index 4bc140f1ca..26c8dfba07 100644 --- a/ares/md/vdp/serialization.cpp +++ b/ares/md/vdp/serialization.cpp @@ -46,6 +46,7 @@ auto VDP::serialize(serializer& s) -> void { s(latch.overscan); s(latch.displayWidth); s(latch.clockSelect); + s(latch.displayEnable); s(state.counterLatchValue); s(state.hcounter); @@ -54,6 +55,10 @@ auto VDP::serialize(serializer& s) -> void { s(state.hblank); s(state.vblank); s(state.refreshing); + s(state.rambusy); + s(state.edclkPos); + s(state.topline); + s(state.bottomline); } auto VDP::PSG::serialize(serializer& s) -> void { @@ -83,6 +88,7 @@ auto VDP::Slot::serialize(serializer& s) -> void { s(data); s(upper); s(lower); + s(latency); } auto VDP::Prefetch::serialize(serializer& s) -> void { @@ -102,7 +108,7 @@ auto VDP::DMA::serialize(serializer& s) -> void { s(wait); s(read); s(enable); - s(delay); + s(preload); } auto VDP::Pixel::serialize(serializer& s) -> void { @@ -217,5 +223,7 @@ auto VDP::VSRAM::serialize(serializer& s) -> void { } auto VDP::CRAM::serialize(serializer& s) -> void { + s(bus.active); + s(bus.data); s(memory); } diff --git a/ares/md/vdp/sprite.cpp b/ares/md/vdp/sprite.cpp index 0b586c2a2c..606ae324ed 100644 --- a/ares/md/vdp/sprite.cpp +++ b/ares/md/vdp/sprite.cpp @@ -85,8 +85,6 @@ auto VDP::Sprite::patternFetch(u32) -> void { if(test.disablePhase3) mappings[patternIndex].valid = 0; auto interlace = vdp.io.interlaceMode == 3; - auto y = 129 + (i9)vdp.vcounter(); - if(interlace) y = y << 1 | vdp.field(); if(mappings[patternIndex].valid) { auto& object = mappings[patternIndex]; @@ -134,6 +132,14 @@ auto VDP::Sprite::patternFetch(u32) -> void { } else { maskCheck = 0; } +} + +auto VDP::Sprite::scan() -> void { + if(!vdp.displayEnable()) return; + + auto interlace = vdp.io.interlaceMode == 3; + auto y = 129 + (i9)vdp.vcounter(); + if(interlace) y = y << 1 | vdp.field(); if(test.disablePhase1) visibleStop = 1; diff --git a/ares/md/vdp/vdp.cpp b/ares/md/vdp/vdp.cpp index 26684ddfd0..5dba6797e4 100644 --- a/ares/md/vdp/vdp.cpp +++ b/ares/md/vdp/vdp.cpp @@ -34,17 +34,15 @@ VDP vdp; auto VDP::load(Node::Object parent) -> void { node = parent->append("VDP"); - screen = node->append("Screen", 1388, visibleHeight() * 2); + screen = node->append("Screen", 1415, visibleHeight() * 2); screen->colors(1 << 16, {&VDP::color, this}); - screen->setSize(1388, visibleHeight() * 2); + screen->setSize(1415, visibleHeight() * 2); screen->setScale(0.25, 0.5); - Region::PAL() ? screen->setAspect(111.0, 100.0) : screen->setAspect(32.0, 35.0); + Region::PAL() ? screen->setAspect(41.0, 37.0) : screen->setAspect(32.0, 35.0); screen->refreshRateHint(system.frequency(), 3420, frameHeight()); psg.load(node); debugger.load(node); - - generateCycleTimings(); } auto VDP::unload() -> void { @@ -56,32 +54,44 @@ auto VDP::unload() -> void { node.reset(); } -auto VDP::pixels() -> u32* { - //TODO: vcounter values of top border may not be correct here +auto VDP::updateScreenParams() -> void { + if(Region::NTSC() && v28()) { state.topline = 0x1e5; state.bottomline = 0x0ea; } + if(Region::NTSC() && v30()) { state.topline = 0x000; state.bottomline = 0x1ff; } + if(Region::PAL() && v28()) { state.topline = 0x1ca; state.bottomline = 0x102; } + if(Region::PAL() && v30()) { state.topline = 0x1d2; state.bottomline = 0x10a; } +} +auto VDP::pixels() -> u32* { u32* output = nullptr; - if(Region::NTSC() && vcounter() >= 0x1ed) return nullptr; - if(Region::PAL() && vcounter() >= 0x1f0) return nullptr; - - //account for vcounter jumps during blanking periods n9 y = vcounter(); - if(Region::NTSC() && v28() && vcounter() >= 0x1e5) y -= 250; - if(Region::PAL() && v28() && vcounter() >= 0x1ca) y -= 201; - if(v30() && Region::PAL() && vcounter() >= 0x1d2) y -= 201; - auto offset = Region::PAL() ? 38 : 11; - if(latch.overscan) offset -= 8; + // disregard blanked lines + if(Region::NTSC() && y >= 0x0e8 && y < 0x1f5) return nullptr; + if(Region::PAL() && latch.overscan && y >= 0x108 && y < 0x1e2) return nullptr; + if(Region::PAL() && !latch.overscan && y >= 0x100 && y < 0x1da) return nullptr; - y = (y + offset) % visibleHeight(); + // adjust vcounter to account for vsync period & vcounter jump + if(Region::NTSC() && y >= 0x0e8) y -= (0x1f5 - 0x0e8); + if(Region::PAL() && y >= 0x108) y -= (0x1e2 - 0x108); - output = screen->pixels().data() + y * 2 * 1388; - if(latch.interlace) output += field() * 1388; + // adjust for top border + if(Region::NTSC()) y += 11; + if(Region::PAL() ) y += 38 - 8 * latch.overscan; + y = y % visibleHeight(); - //TODO: this should probably be handled in DAC - n32 bg = 1 << 11 | 1 << 9 | cram.color(io.backgroundColor); - for(auto n: range(1388)) output[n] = bg; + output = screen->pixels().data() + y * 2 * 1415; + if(latch.interlace) output += field() * 1415; - return output + 52; + if(h40()) { + // H40 mode has slightly shorter lines, so sides are blanked. + // Left side would be 13 wide, but we'll realign to whole pixel (3*4) for sanity. + for(auto n: range(12)) output[ n] = 0b101 << 9; + for(auto n: range(15)) output[1415-15+n] = 0b101 << 9; + + return output+12; + } + + return output; } auto VDP::frame() -> void { @@ -89,26 +99,16 @@ auto VDP::frame() -> void { if(latch.interlace == 1) screen->setInterlace(field()); if(screen->overscan()) { - screen->setSize(1388, visibleHeight() * 2); + screen->setSize(1415, visibleHeight() * 2); screen->setViewport(0, 0, screen->width(), screen->height()); } else { - int x = 14 * 4; - int y = 12 * 2; - int width = 1388 - (28 * 4); - int height = (visibleHeight() * 2) - (24 * 2); - - if(Region::PAL()) { - y += 28 * 2; - height -= 48 * 2; - - if(v30()) { - y -= 8 * 2; - height += 16 * 2; - } - } - - screen->setSize(width, height); - screen->setViewport(x, y, width, height); + int x = 13 * 5; + int y = Region::PAL() ? 30 + 8 * v28() : 11; + int width = 1280; + int height = screenHeight(); + + screen->setSize(width, height * 2); + screen->setViewport(x, y * 2, width, height * 2); } screen->frame(); @@ -127,6 +127,8 @@ auto VDP::power(bool reset) -> void { vram.mode = 0; vram.refreshing = 0; + cram.bus.active = 0; + cram.bus.data = 0; command = {}; io = {}; test = {}; diff --git a/ares/md/vdp/vdp.hpp b/ares/md/vdp/vdp.hpp index 75982481e4..dc5bcc242f 100644 --- a/ares/md/vdp/vdp.hpp +++ b/ares/md/vdp/vdp.hpp @@ -38,7 +38,7 @@ struct VDP : Thread { auto hblank() const -> bool { return state.hblank; } auto vblank() const -> bool { return state.vblank; } auto refreshing() const -> bool { return vram.refreshing; } - auto displayEnable() const -> bool { return io.displayEnable && !state.vblank; } + auto displayEnable() const -> bool { return latch.displayEnable && !state.vblank; } auto h32() const -> bool { return latch.displayWidth == 0; } //256-width auto h40() const -> bool { return latch.displayWidth == 1; } //320-width @@ -51,32 +51,33 @@ struct VDP : Thread { auto screenWidth() const -> u32 { return latch.displayWidth ? 320 : 256; } auto screenHeight() const -> u32 { return io.overscan ? 240 : 224; } - auto frameHeight() const -> u32 { return Region::PAL() ? 312 : 262; } + auto frameHeight() const -> u32 { return Region::PAL() ? 313 : 262; } auto visibleHeight() const -> u32 { return Region::PAL() ? 294 : 243; } //vdp.cpp auto load(Node::Object) -> void; auto unload() -> void; + auto updateScreenParams() -> void; auto pixels() -> u32*; auto frame() -> void; auto power(bool reset) -> void; //main.cpp auto step(u32 clocks) -> void; - template auto tick() -> void; + template auto fullslotStep() -> void; + template auto tick() -> void; + template auto htick() -> void; auto vtick() -> void; auto hblank(bool line) -> void; auto vblank(bool line) -> void; auto vblankcheck() -> void; auto vedge() -> void; auto slot() -> void; - auto refresh(bool active) -> void; auto main() -> void; auto render() -> void; auto mainH32() -> void; auto mainH40() -> void; template auto blocks() -> void; - auto generateCycleTimings() -> void; //io.cpp auto read(n1 upper, n1 lower, n24 address, n16 data) -> n16; @@ -156,6 +157,7 @@ struct VDP : Thread { n16 data; //write data n1 upper; //1 = data.byte(1) valid n1 lower; //1 = data.byte(0) valid + u8 latency; }; struct Prefetch { @@ -178,6 +180,7 @@ struct VDP : Thread { auto full() const -> bool { return !slots[3].empty(); } //fifo.cpp + auto tick() -> void; auto advance() -> void; auto run() -> bool; auto write(n4 target, n17 address, n16 data) -> void; @@ -192,6 +195,7 @@ struct VDP : Thread { struct DMA { //dma.cpp auto synchronize() -> void; + auto fetch() -> void; auto run() -> bool; auto load() -> void; auto fill() -> void; @@ -209,7 +213,7 @@ struct VDP : Thread { n1 wait; n1 read; n1 enable; - n4 delay; + n4 preload; } dma; struct Pixel { @@ -328,6 +332,7 @@ struct VDP : Thread { auto end() -> void; auto mappingFetch(u32) -> void; auto patternFetch(u32) -> void; + auto scan() -> void; auto pixel(u32 x) -> Pixel; auto power(bool reset) -> void; @@ -390,6 +395,10 @@ struct VDP : Thread { //dac.cpp template auto pixel(u32 x) -> void; template auto output(n32 color) -> void; + template inline auto fillBorder(n8 ofst) -> void; + auto fillLeftBorder(n8 ofst = 0) -> void; + auto fillRightBorder(n8 ofst = 0) -> void; + auto dot(n9 hpos, n9 color) -> void; auto power(bool reset) -> void; //serialization.cpp @@ -401,6 +410,7 @@ struct VDP : Thread { } test; u32* pixels = nullptr; + u32* active = nullptr; } dac; //color.cpp @@ -452,6 +462,11 @@ struct VDP : Thread { //serialization.cpp auto serialize(serializer&) -> void; + struct Bus { + n1 active; + n9 data; + } bus; + n9 memory[64]; } cram; @@ -502,6 +517,7 @@ struct VDP : Thread { //per-scanline n1 displayWidth; n1 clockSelect; + n1 displayEnable; } latch; struct State { @@ -512,12 +528,11 @@ struct VDP : Thread { n1 hblank; n1 vblank; n1 refreshing; + n1 rambusy; + n8 edclkPos; + n9 topline; + n9 bottomline; } state; - -//unserialized: - u8 cyclesH32[2][342], halvesH32[2][171], extrasH32[2][171]; - u8 cyclesH40[2][420], halvesH40[2][210], extrasH40[2][210]; - u8* cycles = nullptr; }; extern VDP vdp;