-
Notifications
You must be signed in to change notification settings - Fork 58
/
Copy pathk-ahci.cc
360 lines (300 loc) · 11.9 KB
/
k-ahci.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
#include "k-ahci.hh"
#include "k-apic.hh"
#include "k-pci.hh"
// k-ahci.cc
//
// Functions for interacting with advanced SATA disks.
// HELPER FUNCTIONS FOR PREPARING, ISSUING, AND ACKNOWLEDGING COMMANDS
// ahcistate::clear(slot)
// Prepare `slot` to receive a command.
inline void ahcistate::clear(int slot) {
assert(unsigned(slot) < unsigned(nslots_));
dma_.ch[slot].nbuf = 0;
dma_.ch[slot].buf_byte_pos = 0;
}
// ahcistate::push_buffer(slot, buf, sz)
// Append a data buffer to the buffers relevant for the next command.
void ahcistate::push_buffer(int slot, void* buf, size_t sz) {
// check requirements on address and size
uint64_t pa = kptr2pa(buf);
assert((pa & 1) == 0 && (sz & 1) == 0); // word-aligned
assert(sz > 0 && sz <= (64U << 10)); // >0, <64KiB
assert(pa <= 0x100000000UL); // low physical memory
assert((pa ^ (pa + sz - 1)) < (64U << 10)); // within aligned 64KiB
// check slot availability
assert(unsigned(slot) < unsigned(nslots_));
assert(dma_.ch[slot].nbuf < arraysize(dma_.ct[slot].buf));
int nbuf = dma_.ch[slot].nbuf;
dma_.ct[slot].buf[nbuf].pa = pa;
dma_.ct[slot].buf[nbuf].maxbyte = sz - 1;
dma_.ch[slot].nbuf = nbuf + 1;
dma_.ch[slot].buf_byte_pos += sz;
}
// ahcistate::issue_ncq(slot, cmd, sector, fua = false, priority = 0)
// Issue an NCQ command (`cmd_read/write_fpdma_queued`) to the disk.
// Must be preceded by `clear(slot); push_buffer(slot...)*`.
// `fua`: If true, then do not acknowledge the write until data has
// been durably written to the disk.
// `priority`: 0 is normal; 2 means high priority.
void ahcistate::issue_ncq(int slot, idecommand cmd, size_t sector,
bool fua, int priority) {
assert(unsigned(slot) < unsigned(nslots_));
assert(!(slots_outstanding_mask_ & (1U << slot)));
assert(cmd == cmd_read_fpdma_queued || cmd == cmd_write_fpdma_queued);
assert(unsigned(priority) < 3);
assert(dma_.ch[slot].buf_byte_pos > 0);
size_t nsectors = dma_.ch[slot].buf_byte_pos / sectorsize;
dma_.ct[slot].cfis[0] = cfis_command | (unsigned(cmd) << 16)
| ((nsectors & 0xFF) << 24);
dma_.ct[slot].cfis[1] = (sector & 0xFFFFFF)
| (uint32_t(fua) << 31) | 0x40000000U;
dma_.ct[slot].cfis[2] = (sector >> 24) | ((nsectors & 0xFF00) << 16);
dma_.ct[slot].cfis[3] = (slot << 3) | (priority << 14);
dma_.ch[slot].flags = 4 /* # words in `cfis` */
| ch_clear_flag
| (cmd == cmd_write_fpdma_queued ? ch_write_flag : 0);
dma_.ch[slot].buf_byte_pos = 0;
// ensure all previous writes have made it out to memory
std::atomic_thread_fence(std::memory_order_release);
pr_->ncq_active_mask = 1U << slot; // tell interface NCQ slot used
pr_->command_mask = 1U << slot; // tell interface command available
// The write to `command_mask` wakes up the device.
slots_outstanding_mask_ |= 1U << slot; // remember slot
--nslots_available_;
}
// ahcistate::acknowledge(slot, result)
// Acknowledge a waiting command in `slot`.
inline void ahcistate::acknowledge(int slot, int result) {
assert(unsigned(slot) < unsigned(nslots_));
assert(slots_outstanding_mask_ & (1U << slot));
slots_outstanding_mask_ ^= 1U << slot;
++nslots_available_;
if (slot_status_[slot]) {
*slot_status_[slot] = result;
slot_status_[slot] = nullptr;
}
}
// FUNCTIONS FOR READING AND WRITING BLOCKS
// ahcistate::read_or_write(command, buf, sz, off)
// Issue an NCQ read or write command `command`. Read or write
// `sz` bytes of data to or from `buf`, starting at disk offset
// `off`. `sz` and `off` are measured in bytes, but must be
// sector-aligned (i.e., multiples of `ahcistate::sectorsize`).
// Can block. Returns 0 on success and an error code on failure.
int ahcistate::read_or_write(idecommand command, void* buf, size_t sz,
size_t off) {
// `sz` and `off` must be sector-aligned
assert(sz % sectorsize == 0 && off % sectorsize == 0);
// obtain lock
auto irqs = lock_.lock();
// block until ready for command
waiter().block_until(wq_, [&] () {
return !slots_outstanding_mask_;
}, lock_, irqs);
// send command, record buffer and status storage
std::atomic<int> r = E_AGAIN;
clear(0);
push_buffer(0, buf, sz);
issue_ncq(0, command, off / sectorsize);
slot_status_[0] = &r;
lock_.unlock(irqs);
// wait for response
waiter().block_until(wq_, [&] () {
return r != E_AGAIN;
});
return r;
}
// FUNCTIONS FOR HANDLING INTERRUPTS
void ahcistate::handle_interrupt() {
// obtain lock, read data
auto irqs = lock_.lock();
// check interrupt reason, clear interrupt
bool is_error = (pr_->interrupt_status & interrupt_fatal_error_mask)
|| (pr_->rstatus & rstatus_error);
pr_->interrupt_status = ~0U;
dr_->interrupt_status = ~0U;
// acknowledge completed commands
uint32_t acks = slots_outstanding_mask_ & ~pr_->ncq_active_mask;
for (int slot = 0; acks != 0; ++slot, acks >>= 1) {
if (acks & 1) {
acknowledge(slot, 0);
}
}
// acknowledge errored commands
if (is_error) {
handle_error_interrupt();
}
lock_.unlock(irqs);
// wake waiters
lapicstate::get().ack();
wq_.wake_all();
}
void ahcistate::handle_error_interrupt() {
for (int slot = 0; slot < 32; ++slot) {
if (slots_outstanding_mask_ & (1U << slot)) {
acknowledge(slot, E_IO);
}
}
// SATA AHCI 1.3.1 §6.2.2.2
pr_->command = pr_->command & ~pcmd_start;
while (pr_->command & pcmd_command_running) {
pause();
}
pr_->serror = ~0U;
pr_->command = pr_->command | pcmd_start;
// XXX must `READ LOG EXT` to clear error
panic("SATA disk error");
}
// INITIALIZATION FUNCTIONS
void ahcistate::issue_meta(int slot, idecommand cmd, int features,
int count) {
assert(unsigned(slot) < unsigned(nslots_));
assert(unsigned(features) < 0x10000);
assert(!(slots_outstanding_mask_ & (1U << slot)));
assert(cmd == cmd_identify_device || cmd == cmd_set_features);
size_t nsectors = dma_.ch[slot].buf_byte_pos / sectorsize;
if (cmd == cmd_set_features && count != -1) {
nsectors = count;
}
dma_.ct[slot].cfis[0] = cfis_command | (unsigned(cmd) << 16)
| (unsigned(features) << 24);
dma_.ct[slot].cfis[1] = 0;
dma_.ct[slot].cfis[2] = (unsigned(features) & 0xFF00) << 16;
dma_.ct[slot].cfis[3] = nsectors;
dma_.ch[slot].flags = 4 /* # words in `cfis` */ | ch_clear_flag;
dma_.ch[slot].buf_byte_pos = 0;
// ensure all previous writes have made it out to memory
std::atomic_thread_fence(std::memory_order_release);
pr_->command_mask = 1U << slot; // tell interface command is available
slots_outstanding_mask_ |= 1U << slot;
--nslots_available_;
}
void ahcistate::await_basic(int slot) {
assert(unsigned(slot) < unsigned(nslots_));
while (pr_->command_mask & (1U << slot)) {
pause();
}
acknowledge(slot, 0);
}
ahcistate::ahcistate(int pci_addr, int sata_port, volatile regs* dr)
: pci_addr_(pci_addr), sata_port_(sata_port),
dr_(dr), pr_(&dr->p[sata_port]), nslots_(1),
nslots_available_(1), slots_outstanding_mask_(0) {
for (int i = 0; i < 32; ++i) {
slot_status_[i] = nullptr;
}
auto& pci = pcistate::get();
pci.enable(pci_addr_);
// place port in idle state
pr_->command = pr_->command & ~uint32_t(pcmd_rfis_enable | pcmd_start);
while (pr_->command & (pcmd_command_running | pcmd_rfis_running)) {
pause();
}
// set up DMA area
memset(&dma_, 0, sizeof(dma_));
for (int i = 0; i != 32; ++i) {
dma_.ch[i].cmdtable_pa = ka2pa(&dma_.ct[i]);
}
pr_->cmdlist_pa = ka2pa(&dma_.ch[0]);
pr_->rfis_pa = ka2pa(&dma_.rfis);
// clear errors and power up
pr_->serror = ~0U;
pr_->command = pr_->command | pcmd_power_up;
// configure interrupts
pr_->interrupt_status = ~0U; // clear pending interrupts
dr_->interrupt_status = ~0U;
pr_->interrupt_enable = interrupt_device_to_host
| interrupt_ncq_complete
| interrupt_error_mask;
dr_->ghc = dr_->ghc | ghc_interrupt_enable;
// wait for functional device
pr_->command = pr_->command | pcmd_rfis_enable;
while ((pr_->rstatus & (rstatus_busy | rstatus_datareq)) != 0
|| !sstatus_active(pr_->sstatus)) {
pause();
}
// wake up port
pr_->command = (pr_->command & ~pcmd_interface_mask)
| pcmd_interface_active;
while ((pr_->command & pcmd_interface_mask) != pcmd_interface_idle) {
pause();
}
// NB QEMU does not implement PxCMD.CLO/pcmd_rfis_clear
pr_->command = pr_->command | pcmd_start;
// send IDENTIFY DEVICE command and check results
uint16_t devid[256];
memset(devid, 0, sizeof(devid));
clear(0);
push_buffer(0, devid, sizeof(devid));
issue_meta(0, cmd_identify_device, 0);
await_basic(0);
assert(!(devid[0] & 0x0004)); // response complete
assert((devid[53] & 0x0006) == 0x0006); // all words set
// valid SATA device
assert((devid[49] & 0x0300) == 0x0300); // LBA + DMA
assert(devid[76] != 0 && devid[76] != 0xFFFF); // Serial ATA device
assert(devid[83] & 0x0400); // 48-bit addresses
assert(!(devid[106] & 0x1000)); // 512-byte logical sectors
assert(devid[76] & 0x0100); // NCQ supported
// fast transfer modes
assert(devid[64] & 0x0002); // PIO mode 4 supported
assert((devid[88] & 0x2020) == 0x2020 // Ultra DMA 5 or 6 selected
|| (devid[88] & 0x4040) == 0x4040);
// count sectors
nsectors_ = devid[100] | (devid[101] << 16) | (uint64_t(devid[102]) << 32)
| (uint64_t(devid[103]) << 48);
assert(nsectors_ < 0x1000000000000UL);
// count slots
assert(dr_->capabilities & 0x40000000U); // NCQ supported
nslots_ = ((dr_->capabilities >> 8) & 0x1F) + 1; // slots per controller
if ((devid[75] & 0x1F) + 1U < nslots_) { // slots per disk
nslots_ = (devid[75] & 0x1F) + 1;
}
slots_full_mask_ = (nslots_ == 32 ? 0xFFFFFFFFU : (1U << nslots_) - 1);
nslots_available_ = nslots_;
// set features
clear(0);
issue_meta(0, cmd_set_features, 0x02); // write cache enable
await_basic(0);
clear(0);
issue_meta(0, cmd_set_features, 0xAA); // read lookahead enable
await_basic(0);
// determine IRQ
int intr_pin = ((pci.readl(pci_addr + 0x3C) >> 8) & 0xFF) - 1;
irq_ = machine_pci_irq(pci_addr_, intr_pin);
// finally, clear pending interrupts again
pr_->interrupt_status = ~0U;
dr_->interrupt_status = ~0U;
}
// ahcistate::find(addr, port)
// Find and initialize an SATA disk. Return a pointer to the `ahcistate`,
// or `nullptr` if the search fails. The search begins at PCI address
// `addr` and SATA port number `port`.
ahcistate* ahcistate::find(int addr, int port) {
auto& pci = pcistate::get();
for (; addr >= 0; addr = pci.next(addr), port = 0) {
if (pci.readw(addr + pci.config_subclass) != 0x0106) {
continue;
}
uint32_t pa = pci.readl(addr + pci.config_bar5);
if (pa == 0) {
continue;
}
auto dr = pa2kptr<volatile regs*>(pa);
// "Indicate that system software is AHCI aware"
if (!(dr->ghc & ghc_ahci_enable)) {
dr->ghc = ghc_ahci_enable;
}
for (; port < 32; ++port) {
if ((dr->port_mask & (1U << port))
&& dr->p[port].sstatus) {
return knew<ahcistate>(addr, port, dr);
}
}
}
return nullptr;
}
static_assert(sizeof(ahcistate::portregs) == 128,
"ahcistate::portregs size");
static_assert(sizeof(ahcistate::regs) == 256 + 32 * 128,
"ahcistate::regs size");