Microkernel thing OS experiment (Zig ⚡)
1const limine = @import("limine");
2const std = @import("std");
3const arch = @import("root.zig");
4const common = @import("common");
5const console = @import("console");
6const log = std.log.scoped(.amd64_init);
7const Idt = arch.structures.Idt;
8const StandardGdt = arch.structures.gdt.StandardGdt;
9const Tss = arch.structures.tss.Tss;
10
11pub const limine_requests = struct {
12 export var start_marker: limine.RequestsStartMarker linksection(".limine_reqs_start") = .{};
13 export var end_marker: limine.RequestsEndMarker linksection(".limine_reqs_end") = .{};
14
15 pub export var base_revision: limine.BaseRevision linksection(".limine_reqs") = .{ .revision = 3 };
16 pub export var framebuffer: limine.FramebufferRequest linksection(".limine_reqs") = .{};
17 pub export var hhdm: limine.HhdmRequest linksection(".limine_reqs") = .{};
18 pub export var memmap: limine.MemoryMapRequest linksection(".limine_reqs") = .{};
19 pub export var rsdp_req: limine.RsdpRequest linksection(".limine_reqs") = .{};
20 pub export var dtb_req: limine.DtbRequest linksection(".limine_reqs") = .{};
21 pub export var modules: limine.ModuleRequest linksection(".limine_reqs") = .{};
22 pub export var mp: limine.SmpMpFeature.MpRequest linksection(".limine_reqs") = .{ .flags = .{ .x2apic = true } };
23};
24
25pub fn bsp_init() callconv(.c) noreturn {
26 // Don't optimize away the limine requests
27 inline for (@typeInfo(limine_requests).@"struct".decls) |decl| {
28 std.mem.doNotOptimizeAway(&@field(limine_requests, decl.name));
29 }
30
31 // If the base revision isn't supported, we can't boot
32 if (!limine_requests.base_revision.isSupported()) {
33 @branchHint(.cold);
34 arch.instructions.die();
35 }
36
37 // Die if we don't have a memory map or Higher Half Direct Mapping
38 if (limine_requests.memmap.response == null) {
39 @branchHint(.cold);
40 arch.instructions.die();
41 }
42
43 if (limine_requests.hhdm.response == null) {
44 @branchHint(.cold);
45 arch.instructions.die();
46 }
47 const hhdm_offset = limine_requests.hhdm.response.?.offset;
48 common.init_data.hhdm_slide = hhdm_offset;
49
50 // Add in a framebuffer if found
51 initConsole();
52
53 // Add in ACPI/dtb if found, prefer ACPI
54 initHwDesc();
55
56 // Set up the temporary Physical Memory Allocator
57 common.mm.bootmem.init();
58
59 // Attach the root task
60 if (limine_requests.modules.response) |module_response| {
61 if (module_response.module_count > 0) {
62 const mod = module_response.modules.?[0];
63 const mod_addr: [*]align(4096) u8 = @ptrCast(mod.address);
64 const mod_size = mod.size;
65 log.info("Loading root task with {s} @ {*}", .{ mod.path, mod.address });
66 common.init_data.root_task = mod_addr[0..mod_size];
67 }
68 } else {
69 @branchHint(.unlikely);
70 @panic("No root task found!");
71 }
72
73 // Initialize per-cpu data (GDT and TSS)
74 arch.per_cpu_init_data.init(limine_requests.mp.response.?.cpu_count);
75
76 // Install the IDT
77 initIdt();
78
79 // Set up our own GDT and TSS
80 const gdt = &arch.per_cpu_init_data.gdt_buf[0];
81 gdt.* = .{};
82 const tss = &arch.per_cpu_init_data.tss_buf[0];
83 // TSS rsp 0x3800
84 tss.* = .{
85 .rsp0 = 0x3800,
86 .rsp1 = 0x3800,
87 .rsp2 = 0x3800,
88 };
89
90 gdt.tss_desc.set_tss_addr(tss);
91 gdt.load();
92 log.info("BSP successfully setup GDT+TSS!", .{});
93
94 // AP bootstrap
95 bootstrapAPs();
96
97 // Calibrate our TSC
98 arch.tsc.calibrate_pit() catch {
99 log.info("Failed to calibrate with PIT!", .{});
100 arch.instructions.die();
101 };
102 log.info("TSC estimate: {} MHz", .{arch.tsc.tsc_khz / 1000});
103
104 log.info("Setting up scheduling...", .{});
105
106 initApic() catch |err| {
107 log.err("Failed to set up APIC! {}", .{err});
108 @panic("apic");
109 };
110
111 log.info("Allocating code for userspace...", .{});
112
113 // Allocate a stack (0x3000 - 0x4000)
114 common.mm.paging.map(.{
115 .vaddr = 0x3000,
116 .size = 0x1000,
117 .memory_type = .MemoryWriteBack,
118 .perms = .{
119 .executable = false,
120 .userspace_accessible = true,
121 .writable = true,
122 },
123 }) catch @panic("couldn't map user stack");
124
125 const entry = common.loadRootTask() catch |err| {
126 log.err("Couldn't load the root task! {}", .{err});
127 @panic("ggz");
128 };
129 log.info("Dropping to userspace entry 0x{x:0>16}", .{entry});
130
131 init_syscalls();
132
133 enter_userspace(entry, 0x69, 0x4000);
134}
135
136// Get ready for system calls (set MSRs)
137fn init_syscalls() void {
138 // Set up the STAR MSR with the segment descriptors
139 const IA32_STAR = arch.registers.MSR(u64, 0xC0000081);
140 const star_value: u64 = 0 | @as(u64, arch.structures.gdt.StandardGdt.selectors.kernel_code) << 32 | (@as(u64, arch.structures.gdt.StandardGdt.selectors.tss_desc + 8) | 3) << 48;
141 IA32_STAR.write(star_value);
142
143 // Set up the EFER MSR with SCE (System Call Enable)
144 const IA32_EFER = arch.registers.MSR(u64, 0xC0000080);
145 const efer_val = IA32_EFER.read() | 0b1;
146 IA32_EFER.write(efer_val);
147
148 // Set up LSTAR with the syscall handler and FMASK to clear interrupts
149 const IA32_LSTAR = arch.registers.MSR(u64, 0xC0000082);
150 IA32_LSTAR.write(@intFromPtr(syscall_entry));
151
152 const IA32_FMASK = arch.registers.MSR(u64, 0xC0000084);
153 IA32_FMASK.write(1 << 9);
154}
155
156const syscall_entry = @extern(*anyopaque, .{
157 .name = "syscall_entry",
158});
159export fn syscall_handler(rdi: usize, rsi: usize) callconv(.c) void {
160 std.log.info("Got a syscall! rdi=0x{x}, rsi=0x{x}", .{ rdi, rsi });
161}
162
163fn enter_userspace(entry: u64, arg: u64, stack: u64) noreturn {
164 log.info("usercode64 GDT 0x{x}, userdata64 GDT 0x{x}", .{ arch.structures.gdt.StandardGdt.selectors.user_code, arch.structures.gdt.StandardGdt.selectors.user_data });
165 const cr3 = arch.registers.ControlRegisters.Cr3.read();
166 arch.registers.ControlRegisters.Cr3.write(cr3);
167 asm volatile (
168 \\ push %[userdata64]
169 \\ push %[stack]
170 \\ push $0x202
171 \\ push %[usercode64]
172 \\ push %[entry]
173 \\
174 \\ mov %[userdata64], %%rax
175 \\ mov %%rax, %%es
176 \\ mov %%rax, %%ds
177 \\
178 \\ xor %%rsi, %%rsi
179 \\ xor %%rax, %%rax
180 \\ xor %%rdx, %%rdx
181 \\ xor %%rcx, %%rcx
182 \\ xor %%rbp, %%rbp
183 \\ xor %%rbx, %%rbx
184 \\
185 \\ xor %%r8, %%r8
186 \\ xor %%r9, %%r9
187 \\ xor %%r10, %%r10
188 \\ xor %%r11, %%r11
189 \\ xor %%r12, %%r12
190 \\ xor %%r13, %%r13
191 \\ xor %%r14, %%r14
192 \\ xor %%r15, %%r15
193 \\
194 \\ iretq
195 \\
196 :
197 : [arg] "{rdi}" (arg),
198 [stack] "r" (stack),
199 [entry] "r" (entry),
200 [userdata64] "i" (arch.structures.gdt.StandardGdt.selectors.user_data),
201 [usercode64] "i" (arch.structures.gdt.StandardGdt.selectors.user_code),
202 );
203 unreachable;
204}
205
206fn initApic() !void {
207 const has_x2apic = limine_requests.mp.response.?.flags.x2apic;
208 arch.interrupts.apic.singleton = switch (has_x2apic) {
209 true => .x2apic,
210 false => blk: {
211 // Map the APIC first!
212 const apic_base = common.mm.physToHHDM([*]volatile u8, 0xFEE0_0000);
213 try common.mm.paging.mapPhys(.{
214 .vaddr = @intFromPtr(apic_base),
215 .paddr = 0xFEE0_0000,
216 .size = 0x1000,
217 .memory_type = .DeviceUncacheable,
218 .perms = .{
219 .executable = false,
220 .userspace_accessible = false,
221 .writable = true,
222 },
223 });
224 break :blk .{ .xapic = apic_base };
225 },
226 };
227 // Set up the spurious vector and the TPR
228 arch.interrupts.apic.init.initialSetup();
229
230 // Calibrate the APIC timer
231 arch.interrupts.apic.init.calibrateTimer();
232
233 // Enable periodic interrupts
234 arch.interrupts.apic.init.enablePeriodicInterrupt(1000);
235}
236
237fn initConsole() void {
238 if (limine_requests.framebuffer.response) |fb_response| {
239 if (fb_response.framebuffer_count > 0) {
240 const fb = console.Framebuffer.from_limine(fb_response.getFramebuffers()[0]);
241 common.init_data.framebuffer = fb;
242 // At this point, log becomes usable
243 common.init_data.console = console.Console.from_font(fb, console.DefaultFont);
244 common.init_data.console.?.setColor(0x3bcf1d, 0);
245 }
246 }
247}
248
249fn initHwDesc() void {
250 if (limine_requests.dtb_req.response) |dtb_response| {
251 common.init_data.hardware_description = .{ .dtb = dtb_response.dtb_ptr };
252 }
253 if (limine_requests.rsdp_req.response) |rsdp_response| {
254 common.init_data.hardware_description = .{ .acpi_rsdp = rsdp_response.address };
255 }
256}
257
258pub fn initIdt() void {
259 const idt_addr: usize = @intFromPtr(arch.per_cpu_init_data.idt);
260
261 // Install the known exception handlers
262 arch.per_cpu_init_data.idt.breakpoint.installHandler(breakpoint_handler);
263 arch.per_cpu_init_data.idt.double_fault.installHandler(double_fault);
264 arch.per_cpu_init_data.idt.general_protection_fault.installHandler(gpf);
265 arch.per_cpu_init_data.idt.page_fault.installHandler(page_fault);
266 arch.per_cpu_init_data.idt.interrupts[0xFF - 32].installHandler(arch.interrupts.apic.spurious_interrupt_handler);
267 arch.per_cpu_init_data.idt.interrupts[48 - 32].installHandler(arch.interrupts.apic.periodic_handler);
268
269 // Load the Idt Register
270 const reg: Idt.Idtr = .{ .addr = idt_addr, .limit = @sizeOf(Idt) - 1 };
271 reg.load();
272}
273
274// TODO: update the type reflection thing to make a custom
275// function type for the ISR
276pub const PageFaultErrorCode = packed struct(u64) {
277 present: bool,
278 write: bool,
279 user: bool,
280 reserved_write: bool,
281 instruction_fetch: bool,
282 protection_key: bool,
283 shadow_stack: bool,
284 _reserved: u8,
285 sgx: bool,
286 _reserved2: u48,
287
288 pub fn val(self: *const PageFaultErrorCode) u64 {
289 return @bitCast(self.*);
290 }
291};
292pub fn page_fault(stack_frame: *arch.structures.Idt.InterruptStackFrame, err_code_u64: u64) callconv(.{ .x86_64_interrupt = .{} }) void {
293 const err_code: PageFaultErrorCode = @bitCast(err_code_u64);
294 log.err("PAGE FAULT @ 0x{x:0>16}, code 0x{x}!!!!!!!!!!!", .{ stack_frame.instruction_pointer, err_code.val() });
295 const cr2 = arch.registers.ControlRegisters.Cr2.read();
296 switch (err_code.write) {
297 true => log.err("Tried to write to vaddr 0x{x:0>16}", .{cr2}),
298 false => log.err("Tried to read from vaddr 0x{x:0>16}", .{cr2}),
299 }
300 log.err("dying...", .{});
301 arch.instructions.die();
302}
303
304pub fn breakpoint_handler(stack_frame: *Idt.InterruptStackFrame) callconv(.{ .x86_64_interrupt = .{} }) void {
305 log.warn("Breakpoint @ 0x{x:0>16}, returning execution...", .{stack_frame.instruction_pointer});
306}
307
308pub fn gpf(stack_frame: *Idt.InterruptStackFrame, err_code: u64) callconv(.{ .x86_64_interrupt = .{} }) void {
309 log.warn("gpf @ 0x{x:0>16} ERR CODE {}, returning execution...", .{ stack_frame.instruction_pointer, err_code });
310 arch.instructions.die();
311}
312
313pub fn double_fault(stack_frame: *Idt.InterruptStackFrame, err_code: u64) callconv(.{ .x86_64_interrupt = .{} }) noreturn {
314 common.init_data.console.?.setColor(0xf40d17, 0);
315 log.err("FATAL DOUBLE FAULT @ 0x{x:0>16}, code 0x{x}!!!!!!!!!!!", .{ stack_frame.instruction_pointer, err_code });
316 log.err("dying...", .{});
317 arch.instructions.die();
318}
319
320fn bootstrapAPs() void {
321 log.info("Bootstrapping APs...", .{});
322 const cpus = limine_requests.mp.response.?.getCpus();
323 for (cpus) |cpu| {
324 cpu.goto_address = ap_init;
325 }
326}
327
328fn ap_init(mp_info: *limine.SmpMpFeature.MpInfo) callconv(.c) noreturn {
329 // Set up the IDT
330 const idt_addr: usize = @intFromPtr(arch.per_cpu_init_data.idt);
331 const reg: Idt.Idtr = .{ .addr = idt_addr, .limit = @sizeOf(Idt) - 1 };
332 reg.load();
333
334 // Set up our GDT and TSS
335 const gdt = &arch.per_cpu_init_data.gdt_buf[mp_info.processor_id];
336 gdt.* = .{};
337 const tss = &arch.per_cpu_init_data.tss_buf[mp_info.processor_id];
338 tss.* = .{};
339
340 gdt.tss_desc.set_tss_addr(tss);
341 gdt.load();
342
343 log.info("CPU {}: setup GDT and TSS, killing myself rn...", .{mp_info.processor_id});
344
345 arch.instructions.die();
346}