diff --git a/.gitignore b/.gitignore index 18b6da13..65b77196 100644 --- a/.gitignore +++ b/.gitignore @@ -77,3 +77,6 @@ ovmf/ **/target/* scripts/__pycache__ + +# Third-party sources and build artifacts (cloned/built locally) +third-party/ diff --git a/kernel/src/arch_impl/aarch64/context_switch.rs b/kernel/src/arch_impl/aarch64/context_switch.rs index f2cf9958..19c939cd 100644 --- a/kernel/src/arch_impl/aarch64/context_switch.rs +++ b/kernel/src/arch_impl/aarch64/context_switch.rs @@ -1288,7 +1288,22 @@ fn set_next_ttbr0_for_thread(thread_id: u64) -> TtbrResult { .map(|pt| pt.level_4_frame().start_address().as_u64()) .or(process.inherited_cr3) } else { - // Thread's process not found — orphaned thread + // Thread's process not found — orphaned thread. + // Diagnostic: dump all process thread IDs to identify the mismatch. + raw_uart_str("\n[TTBR_DIAG] wanted_tid="); + raw_uart_dec(thread_id); + raw_uart_str(" nproc="); + raw_uart_dec(manager.process_count() as u64); + for (pid, proc) in manager.iter_processes() { + raw_uart_str(" p"); + raw_uart_dec(pid.as_u64()); + raw_uart_str(":t"); + match proc.main_thread.as_ref() { + Some(t) => raw_uart_dec(t.id), + None => raw_uart_str("X"), + } + } + raw_uart_str("\n"); drop(manager_guard); return TtbrResult::ProcessGone; } diff --git a/kernel/src/arch_impl/aarch64/elf.rs b/kernel/src/arch_impl/aarch64/elf.rs index 9ead643f..3a724b3b 100644 --- a/kernel/src/arch_impl/aarch64/elf.rs +++ b/kernel/src/arch_impl/aarch64/elf.rs @@ -88,6 +88,12 @@ pub struct LoadedElf { pub segments_end: u64, /// Lowest loaded address pub load_base: u64, + /// Virtual address of program headers (from PT_PHDR or load_base + phoff) + pub phdr_vaddr: u64, + /// Number of program headers + pub phnum: u16, + /// Size of each program header entry + pub phentsize: u16, } /// Validate an ELF header for ARM64 @@ -146,6 +152,7 @@ pub unsafe fn load_elf_kernel_space(data: &[u8]) -> Result = None; // Process program headers let ph_offset = header.phoff as usize; @@ -160,6 +167,11 @@ pub unsafe fn load_elf_kernel_space(data: &[u8]) -> Result Result Result = None; // Process program headers let ph_offset = header.phoff as usize; @@ -306,6 +326,11 @@ pub fn load_elf_into_page_table( ph_bytes.copy_from_slice(&data[ph_start..ph_start + mem::size_of::()]); let ph: &Elf64ProgramHeader = unsafe { &*(ph_bytes.as_ptr() as *const Elf64ProgramHeader) }; + // Check for PT_PHDR segment + if ph.p_type == SegmentType::Phdr as u32 { + phdr_vaddr = Some(ph.p_vaddr); + } + if ph.p_type == SegmentType::Load as u32 { load_segment_into_page_table(data, ph, page_table)?; @@ -323,9 +348,13 @@ pub fn load_elf_into_page_table( // Page-align the heap start (4KB alignment) let heap_start = (max_segment_end + 0xfff) & !0xfff; + let load_base = if min_load_addr == u64::MAX { 0 } else { min_load_addr }; + // If no PT_PHDR was found, compute from load_base + phoff + let phdr_vaddr = phdr_vaddr.unwrap_or(load_base + header.phoff); + log::debug!( "[elf-arm64] Loaded: base={:#x}, end={:#x}, entry={:#x}", - if min_load_addr == u64::MAX { 0 } else { min_load_addr }, + load_base, heap_start, header.entry ); @@ -333,7 +362,10 @@ pub fn load_elf_into_page_table( Ok(LoadedElf { entry_point: header.entry, segments_end: heap_start, - load_base: if min_load_addr == u64::MAX { 0 } else { min_load_addr }, + load_base, + phdr_vaddr, + phnum: header.phnum, + phentsize: header.phentsize, }) } diff --git a/kernel/src/arch_impl/aarch64/syscall_entry.rs b/kernel/src/arch_impl/aarch64/syscall_entry.rs index c8089354..a237da6f 100644 --- a/kernel/src/arch_impl/aarch64/syscall_entry.rs +++ b/kernel/src/arch_impl/aarch64/syscall_entry.rs @@ -113,38 +113,57 @@ pub extern "C" fn rust_syscall_handler_aarch64(frame: &mut Aarch64ExceptionFrame // Dispatch to syscall handler. // Some syscalls need special handling because they require access to the frame. - // These use SyscallNumber enum values to stay in sync with the shared enum. + // Resolve the raw number to a SyscallNumber via from_u64(). use crate::syscall::SyscallNumber; - let result = if syscall_num == SyscallNumber::Fork as u64 { - sys_fork_aarch64(frame) - } else if syscall_num == SyscallNumber::Exec as u64 { - let exec_result = sys_exec_aarch64(frame, arg1, arg2); - // Trace: exec syscall handler returned to dispatcher - super::trace::trace_exec(b'H'); - exec_result - } else if syscall_num == SyscallNumber::Sigreturn as u64 { - // SIGRETURN restores ALL registers from signal frame - don't overwrite X0 after - match crate::syscall::signal::sys_sigreturn_with_frame_aarch64(frame) { - crate::syscall::SyscallResult::Ok(_) => { - // X0 was already restored from signal frame - don't overwrite it - check_and_deliver_signals_aarch64(frame); - Aarch64PerCpu::preempt_enable(); - return; + + let resolved_num = SyscallNumber::from_u64(syscall_num); + + let result = match resolved_num { + Some(SyscallNumber::Fork) => sys_fork_aarch64(frame), + Some(SyscallNumber::Exec) => { + let exec_result = sys_exec_aarch64(frame, arg1, arg2); + super::trace::trace_exec(b'H'); + exec_result + } + Some(SyscallNumber::Sigreturn) => { + // SIGRETURN restores ALL registers from signal frame - don't overwrite X0 after + match crate::syscall::signal::sys_sigreturn_with_frame_aarch64(frame) { + crate::syscall::SyscallResult::Ok(_) => { + check_and_deliver_signals_aarch64(frame); + Aarch64PerCpu::preempt_enable(); + return; + } + crate::syscall::SyscallResult::Err(errno) => (-(errno as i64)) as u64, } - crate::syscall::SyscallResult::Err(errno) => (-(errno as i64)) as u64, } - } else if syscall_num == SyscallNumber::Pause as u64 { - match crate::syscall::signal::sys_pause_with_frame_aarch64(frame) { - crate::syscall::SyscallResult::Ok(r) => r, - crate::syscall::SyscallResult::Err(e) => (-(e as i64)) as u64, + Some(SyscallNumber::Pause) => { + match crate::syscall::signal::sys_pause_with_frame_aarch64(frame) { + crate::syscall::SyscallResult::Ok(r) => r, + crate::syscall::SyscallResult::Err(e) => (-(e as i64)) as u64, + } } - } else if syscall_num == SyscallNumber::Sigsuspend as u64 { - match crate::syscall::signal::sys_sigsuspend_with_frame_aarch64(arg1, arg2, frame) { - crate::syscall::SyscallResult::Ok(r) => r, - crate::syscall::SyscallResult::Err(e) => (-(e as i64)) as u64, + Some(SyscallNumber::Sigsuspend) => { + match crate::syscall::signal::sys_sigsuspend_with_frame_aarch64(arg1, arg2, frame) { + crate::syscall::SyscallResult::Ok(r) => r, + crate::syscall::SyscallResult::Err(e) => (-(e as i64)) as u64, + } + } + Some(SyscallNumber::Clone) => { + // ARM64 has no fork syscall; libbreenix emulates fork via clone(SIGCHLD, 0, 0, 0, 0). + // Detect fork-style clone (no CLONE_VM) and route to sys_fork_aarch64 which + // needs the exception frame to snapshot parent registers. + const CLONE_VM: u64 = 0x00000100; + if arg1 & CLONE_VM == 0 { + sys_fork_aarch64(frame) + } else { + result_to_u64(crate::syscall::clone::sys_clone(arg1, arg2, arg3, arg4, arg5)) + } + } + Some(syscall) => dispatch_syscall_enum(syscall, arg1, arg2, arg3, arg4, arg5, arg6, frame), + None => { + crate::serial_println!("[syscall] Unknown ARM64 syscall {} - returning ENOSYS", syscall_num); + (-38_i64) as u64 // -ENOSYS } - } else { - dispatch_syscall(syscall_num, arg1, arg2, arg3, arg4, arg5, arg6, frame) }; // Set return value in X0 @@ -276,13 +295,6 @@ fn check_and_deliver_signals_aarch64(frame: &mut Aarch64ExceptionFrame) { // Syscall dispatch // ============================================================================= -// Linux ARM64 syscall number aliases (for compatibility with standard ARM64 binaries) -mod arm64_compat { - pub const EXIT: u64 = 93; - pub const EXIT_GROUP: u64 = 94; - pub const WRITE: u64 = 64; -} - /// Convert SyscallResult to raw u64 return value (positive or negative errno) #[inline] fn result_to_u64(result: crate::syscall::SyscallResult) -> u64 { @@ -297,28 +309,52 @@ fn result_to_u64(result: crate::syscall::SyscallResult) -> u64 { /// This is separate from the shared dispatcher because ARM64 needs to: /// 1. Use `wfi` (not `hlt`) when no more userspace threads remain /// 2. Inline the exit logic since `handlers::sys_exit` is x86_64-only +/// +/// CRITICAL: This function must NEVER return. After terminating the thread, +/// it enters a WFI loop. The timer interrupt will fire and context-switch +/// to another thread; the terminated thread will never be re-scheduled. +/// If this function returned, the userspace exit() caller (e.g., musl's +/// `for(;;) __syscall(SYS_exit, ec)` loop) would re-enter exit, causing +/// double-terminate and double-decrement of COW page refcounts. fn sys_exit_aarch64(exit_code: i32) -> u64 { - crate::serial_println!("[syscall] exit({})", exit_code); - if let Some(thread_id) = crate::task::scheduler::current_thread_id() { - // Handle clear_child_tid for clone threads (CLONE_CHILD_CLEARTID) - { + // Handle clear_child_tid for clone threads (CLONE_CHILD_CLEARTID). + // Extract info under PM lock, but do NOT log while holding it. + let (pid_for_log, name_for_log, futex_info) = { let manager_guard = crate::process::manager(); if let Some(ref manager) = *manager_guard { if let Some((_pid, process)) = manager.find_process_by_thread(thread_id) { - if let Some(tid_addr) = process.clear_child_tid { - let tg_id = process.thread_group_id.unwrap_or(_pid.as_u64()); + let pid_val = _pid.as_u64(); + let name_val = process.name.clone(); + let futex = process.clear_child_tid.map(|tid_addr| { + let tg_id = process.thread_group_id.unwrap_or(pid_val); unsafe { let ptr = tid_addr as *mut u32; if !ptr.is_null() && tid_addr < 0x7FFF_FFFF_FFFF { core::ptr::write_volatile(ptr, 0); } } - drop(manager_guard); - crate::syscall::futex::futex_wake_for_thread_group(tg_id, tid_addr, u32::MAX); - } + (tg_id, tid_addr) + }); + (Some(pid_val), Some(name_val), futex) + } else { + (None, None, None) } + } else { + (None, None, None) } + }; // PM lock dropped here + + // Futex wake outside PM lock + if let Some((tg_id, tid_addr)) = futex_info { + crate::syscall::futex::futex_wake_for_thread_group(tg_id, tid_addr, u32::MAX); + } + + // Log outside PM lock + if let (Some(pid), Some(name)) = (pid_for_log, &name_for_log) { + crate::serial_println!("[syscall] exit({}) pid={} name={}", exit_code, pid, name); + } else { + crate::serial_println!("[syscall] exit({}) thread={}", exit_code, thread_id); } crate::task::process_task::ProcessScheduler::handle_thread_exit(thread_id, exit_code); @@ -340,26 +376,43 @@ fn sys_exit_aarch64(exit_code: i32) -> u64 { crate::serial_println!(" Exit code: {}", exit_code); crate::serial_println!("========================================"); crate::serial_println!(); - - loop { - unsafe { core::arch::asm!("wfi"); } - } } } - crate::task::scheduler::set_need_resched(); - 0 + // Re-enable preemption (balances the preempt_disable in rust_syscall_handler_aarch64) + // so timer interrupts can trigger context-switch to another thread. + Aarch64PerCpu::preempt_enable(); + + // NEVER return to userspace. The thread is terminated; wait for the timer + // interrupt to context-switch away. The scheduler will not re-schedule a + // terminated thread, so this loop runs at most until the next timer tick. + // + // CRITICAL: Must unmask IRQ before WFI. The syscall entry assembly masks IRQ + // (daifset #0x2) and we never return to the assembly epilogue (which would + // call check_need_resched_and_switch_arm64 and restore interrupt state via + // ERET). Without unmasking IRQ here, the timer interrupt is pending but never + // handled — this CPU becomes permanently stuck, unable to process deferred + // thread requeues or context-switch to other threads. + loop { + unsafe { + core::arch::asm!( + "msr daifclr, #2", // Unmask IRQ so timer interrupt can fire + "wfi", // Wait for interrupt — timer will context-switch us away + options(nomem, nostack) + ); + } + } } -/// Dispatch a syscall to the appropriate handler. +/// Dispatch a syscall to the appropriate handler using the resolved SyscallNumber. /// /// Uses the shared SyscallNumber enum to ensure new syscalls are automatically /// picked up by both architectures. Only EXIT requires arch-specific handling /// (wfi vs hlt). All other syscalls delegate to shared implementations. /// /// Returns the syscall result (positive for success, negative errno for error). -fn dispatch_syscall( - num: u64, +fn dispatch_syscall_enum( + syscall: crate::syscall::SyscallNumber, arg1: u64, arg2: u64, arg3: u64, @@ -370,22 +423,6 @@ fn dispatch_syscall( ) -> u64 { use crate::syscall::SyscallNumber; - // Handle Linux ARM64 compatibility numbers first (map to Breenix ABI) - let num = match num { - arm64_compat::EXIT | arm64_compat::EXIT_GROUP => return sys_exit_aarch64(arg1 as i32), - arm64_compat::WRITE => 1, // Map to Breenix WRITE - other => other, - }; - - // Look up in the shared SyscallNumber enum - let syscall = match SyscallNumber::from_u64(num) { - Some(s) => s, - None => { - crate::serial_println!("[syscall] Unknown ARM64 syscall {} - returning ENOSYS", num); - return (-38_i64) as u64; // -ENOSYS - } - }; - // Dispatch using the shared enum — adding a new SyscallNumber variant // without adding a match arm here will produce a compiler warning. match syscall { @@ -393,7 +430,7 @@ fn dispatch_syscall( SyscallNumber::Exit | SyscallNumber::ExitGroup => sys_exit_aarch64(arg1 as i32), // FORK, EXEC, SIGRETURN, PAUSE, SIGSUSPEND are handled before - // dispatch_syscall is called (they need frame access). + // dispatch_syscall_enum is called (they need frame access). // If they somehow reach here, return ENOSYS. SyscallNumber::Fork | SyscallNumber::Exec | SyscallNumber::Sigreturn => (-38_i64) as u64, // PAUSE and SIGSUSPEND also handled before dispatch @@ -465,6 +502,19 @@ fn dispatch_syscall( SyscallNumber::Readlink => result_to_u64(crate::syscall::fs::sys_readlink(arg1, arg2, arg3)), SyscallNumber::Mknod => result_to_u64(crate::syscall::fifo::sys_mknod(arg1, arg2 as u32, arg3)), + // *at variants (ARM64 Linux uses these instead of legacy syscalls) + SyscallNumber::Openat => result_to_u64(crate::syscall::fs::sys_openat(arg1 as i32, arg2, arg3 as u32, arg4 as u32)), + SyscallNumber::Faccessat => result_to_u64(crate::syscall::fs::sys_faccessat(arg1 as i32, arg2, arg3 as u32, arg4 as u32)), + SyscallNumber::Mkdirat => result_to_u64(crate::syscall::fs::sys_mkdirat(arg1 as i32, arg2, arg3 as u32)), + SyscallNumber::Mknodat => result_to_u64(crate::syscall::fs::sys_mknodat(arg1 as i32, arg2, arg3 as u32, arg4)), + SyscallNumber::Unlinkat => result_to_u64(crate::syscall::fs::sys_unlinkat(arg1 as i32, arg2, arg3 as i32)), + SyscallNumber::Symlinkat => result_to_u64(crate::syscall::fs::sys_symlinkat(arg1, arg2 as i32, arg3)), + SyscallNumber::Linkat => result_to_u64(crate::syscall::fs::sys_linkat(arg1 as i32, arg2, arg3 as i32, arg4, arg5 as i32)), + SyscallNumber::Renameat => result_to_u64(crate::syscall::fs::sys_renameat(arg1 as i32, arg2, arg3 as i32, arg4)), + SyscallNumber::Readlinkat => result_to_u64(crate::syscall::fs::sys_readlinkat(arg1 as i32, arg2, arg3, arg4)), + SyscallNumber::Dup3 => result_to_u64(crate::syscall::handlers::sys_dup2(arg1, arg2)), // dup3 with flags=0 is dup2 + SyscallNumber::Pselect6 => result_to_u64(crate::syscall::handlers::sys_select(arg1 as i32, arg2, arg3, arg4, arg5)), // simplified + // PTY syscalls SyscallNumber::PosixOpenpt => result_to_u64(crate::syscall::pty::sys_posix_openpt(arg1)), SyscallNumber::Grantpt => result_to_u64(crate::syscall::pty::sys_grantpt(arg1)), @@ -499,6 +549,18 @@ fn dispatch_syscall( // Display takeover SyscallNumber::TakeOverDisplay => result_to_u64(crate::syscall::handlers::sys_take_over_display()), SyscallNumber::GiveBackDisplay => result_to_u64(crate::syscall::handlers::sys_give_back_display()), + // Vectored I/O + SyscallNumber::Readv => result_to_u64(crate::syscall::iovec::sys_readv(arg1, arg2, arg3)), + SyscallNumber::Writev => result_to_u64(crate::syscall::iovec::sys_writev(arg1, arg2, arg3)), + // Stubs for musl libc compatibility + SyscallNumber::Mremap => (-(crate::syscall::errno::ENOMEM as i64)) as u64, + SyscallNumber::Madvise => 0, + SyscallNumber::Ppoll => result_to_u64(crate::syscall::handlers::sys_ppoll(arg1, arg2, arg3, arg4, arg5)), + SyscallNumber::SetRobustList => 0, + // arch_prctl is x86_64 only - return ENOSYS on ARM64 + SyscallNumber::ArchPrctl => (-(crate::syscall::errno::ENOSYS as i64)) as u64, + // Filesystem: newfstatat + SyscallNumber::Newfstatat => result_to_u64(crate::syscall::fs::sys_newfstatat(arg1 as i32, arg2, arg3, arg4 as u32)), // Testing/diagnostic syscalls SyscallNumber::CowStats => sys_cow_stats_aarch64(arg1), SyscallNumber::SimulateOom => sys_simulate_oom_aarch64(arg1), diff --git a/kernel/src/arch_impl/aarch64/timer_interrupt.rs b/kernel/src/arch_impl/aarch64/timer_interrupt.rs index c8ba5ede..fe8ab7da 100644 --- a/kernel/src/arch_impl/aarch64/timer_interrupt.rs +++ b/kernel/src/arch_impl/aarch64/timer_interrupt.rs @@ -482,22 +482,29 @@ fn poll_keyboard_to_stdin() { let keycode = event.code; let pressed = event.value != 0; - // Track shift key state + // Track modifier key state if input_mmio::is_shift(keycode) { SHIFT_PRESSED.store(pressed, core::sync::atomic::Ordering::Relaxed); continue; } - // Only process key presses (not releases) + // Only process key presses and repeats (not releases) if pressed { + // Generate VT100 escape sequences for special keys + // (F-keys, arrows, Home, End, Delete) + if let Some(seq) = input_mmio::keycode_to_escape_seq(keycode) { + for &b in seq { + if !crate::tty::push_char_nonblock(b) { + crate::ipc::stdin::push_byte_from_irq(b); + } + } + continue; + } + let shift = SHIFT_PRESSED.load(core::sync::atomic::Ordering::Relaxed); if let Some(c) = input_mmio::keycode_to_char(keycode, shift) { // Route through TTY for echo and line discipline processing. // This is the non-blocking version safe for interrupt context. - // The TTY will: - // 1. Echo the character to the display - // 2. Process it through line discipline (handle backspace, Ctrl-C, etc.) - // 3. Add it to the TTY input buffer for userspace to read if !crate::tty::push_char_nonblock(c as u8) { // TTY busy - fall back to raw stdin buffer // (no echo, but at least input isn't lost) diff --git a/kernel/src/boot/test_list.rs b/kernel/src/boot/test_list.rs index 8e5718d0..6d128b3c 100644 --- a/kernel/src/boot/test_list.rs +++ b/kernel/src/boot/test_list.rs @@ -77,6 +77,8 @@ pub const TEST_BINARIES: &[&str] = &[ "ls_test", // Rust std library test (installed as hello_world.elf on ext2) "hello_world", + // musl libc C program (cross-compiled with musl libc for aarch64) + "hello_musl", // Fork / CoW tests "fork_memory_test", "fork_state_test", diff --git a/kernel/src/drivers/virtio/input_mmio.rs b/kernel/src/drivers/virtio/input_mmio.rs index bc4a1563..7853c214 100644 --- a/kernel/src/drivers/virtio/input_mmio.rs +++ b/kernel/src/drivers/virtio/input_mmio.rs @@ -907,7 +907,7 @@ pub fn keycode_to_char(code: u16, shift: bool) -> Option { /// Convert a Linux keycode to a VT100 escape sequence for special keys /// that require multi-byte output (arrow keys, Home, End, Delete, F-keys). -fn keycode_to_escape_seq(code: u16) -> Option<&'static [u8]> { +pub fn keycode_to_escape_seq(code: u16) -> Option<&'static [u8]> { match code { // Function keys (SS3 sequences for F1-F4, CSI sequences for F5+) 59 => Some(b"\x1bOP"), // F1 diff --git a/kernel/src/elf.rs b/kernel/src/elf.rs index 7d86d02c..3b0dff03 100644 --- a/kernel/src/elf.rs +++ b/kernel/src/elf.rs @@ -87,6 +87,12 @@ pub struct LoadedElf { pub stack_top: VirtAddr, /// End of loaded segments, page-aligned up (start of heap) pub segments_end: u64, + /// Virtual address of program headers (from PT_PHDR or load_base + phoff) + pub phdr_vaddr: u64, + /// Number of program headers + pub phnum: u16, + /// Size of each program header entry + pub phentsize: u16, } /// Load an ELF64 binary into memory @@ -162,6 +168,8 @@ pub fn load_elf_at_base(data: &[u8], base_offset: VirtAddr) -> Result = None; for i in 0..ph_count { let ph_start = ph_offset + i * ph_size; @@ -174,6 +182,11 @@ pub fn load_elf_at_base(data: &[u8], base_offset: VirtAddr) -> Result()]); let ph: &Elf64ProgramHeader = unsafe { &*(ph_bytes.as_ptr() as *const Elf64ProgramHeader) }; + // Check for PT_PHDR segment + if ph.p_type == SegmentType::Phdr as u32 { + phdr_vaddr = Some(ph.p_vaddr); + } + if ph.p_type == SegmentType::Load as u32 { load_segment(data, ph, base_offset)?; @@ -193,12 +206,19 @@ pub fn load_elf_at_base(data: &[u8], base_offset: VirtAddr) -> Result = None; // Load program segments for i in 0..header.phnum { @@ -371,6 +393,11 @@ pub fn load_elf_into_page_table( ph_bytes.copy_from_slice(&data[ph_start..ph_start + mem::size_of::()]); let ph: &Elf64ProgramHeader = unsafe { &*(ph_bytes.as_ptr() as *const Elf64ProgramHeader) }; + // Check for PT_PHDR segment + if ph.p_type == SegmentType::Phdr as u32 { + phdr_vaddr = Some(ph.p_vaddr); + } + if ph.p_type == SegmentType::Load as u32 { load_segment_into_page_table(data, ph, page_table)?; @@ -385,6 +412,10 @@ pub fn load_elf_into_page_table( // Align heap start to next page boundary (4KB) let heap_start = (max_segment_end + 0xfff) & !0xfff; + // If no PT_PHDR was found, compute from header phoff + // For position-dependent executables, this is the file offset which matches vaddr for first LOAD at 0 + let phdr_vaddr = phdr_vaddr.unwrap_or(header.phoff); + log::info!( "ELF loaded: segments end at {:#x}, heap will start at {:#x}", max_segment_end, @@ -395,6 +426,9 @@ pub fn load_elf_into_page_table( entry_point: VirtAddr::new(header.entry), stack_top: VirtAddr::zero(), // Stack will be allocated by spawn function segments_end: heap_start, + phdr_vaddr, + phnum: header.phnum, + phentsize: header.phentsize, }) } diff --git a/kernel/src/graphics/arm64_fb.rs b/kernel/src/graphics/arm64_fb.rs index 9c31139f..d0f92330 100644 --- a/kernel/src/graphics/arm64_fb.rs +++ b/kernel/src/graphics/arm64_fb.rs @@ -448,6 +448,21 @@ impl Canvas for ShellFrameBuffer { /// Global shell framebuffer instance (compatible with x86_64 logger.rs interface) pub static SHELL_FRAMEBUFFER: OnceCell> = OnceCell::uninit(); +/// Cached framebuffer dimensions, set once during init and never modified. +/// This allows sys_fbinfo to read dimensions without acquiring the framebuffer lock, +/// avoiding contention with BWM's flush operations which hold the lock for ~400μs +/// during full-screen pixel copies. +pub static FB_INFO_CACHE: OnceCell = OnceCell::uninit(); + +/// Immutable framebuffer info cached at initialization time. +pub struct FbInfoCache { + pub width: usize, + pub height: usize, + pub stride: usize, + pub bytes_per_pixel: usize, + pub is_bgr: bool, +} + /// Initialize the shell framebuffer /// /// Must be called after VirtIO GPU initialization. @@ -461,6 +476,15 @@ pub fn init_shell_framebuffer() -> Result<(), &'static str> { fb.height() ); + // Cache immutable dimensions for lock-free access by sys_fbinfo + let _ = FB_INFO_CACHE.try_init_once(|| FbInfoCache { + width: fb.width(), + height: fb.height(), + stride: fb.stride(), + bytes_per_pixel: fb.bytes_per_pixel(), + is_bgr: fb.is_bgr(), + }); + let _ = SHELL_FRAMEBUFFER.try_init_once(|| Mutex::new(fb)); Ok(()) } diff --git a/kernel/src/ipc/fd.rs b/kernel/src/ipc/fd.rs index 1b17efe5..f490ee64 100644 --- a/kernel/src/ipc/fd.rs +++ b/kernel/src/ipc/fd.rs @@ -307,6 +307,21 @@ impl FdTable { FdTable { fds } } + /// Take all file descriptor entries out of the table, leaving it empty. + /// + /// Returns a Vec of (fd_number, FileDescriptor) pairs for deferred cleanup. + /// Used by process exit to extract FD entries while holding PM lock, + /// then close them outside the lock to minimize lock hold time. + pub fn take_all(&mut self) -> alloc::vec::Vec<(usize, FileDescriptor)> { + let mut entries = alloc::vec::Vec::new(); + for fd in 0..MAX_FDS { + if let Some(entry) = self.fds[fd].take() { + entries.push((fd, entry)); + } + } + entries + } + /// Allocate a new file descriptor with the given kind /// Returns the fd number on success, or an error code pub fn alloc(&mut self, kind: FdKind) -> Result { diff --git a/kernel/src/memory/frame_allocator.rs b/kernel/src/memory/frame_allocator.rs index 0083f152..7ebf8f06 100644 --- a/kernel/src/memory/frame_allocator.rs +++ b/kernel/src/memory/frame_allocator.rs @@ -29,13 +29,16 @@ struct UsableRegion { end: u64, } -/// Stores extracted memory information +/// Stores extracted memory information (immutable after initialization) struct MemoryInfo { regions: [Option; MAX_REGIONS], region_count: usize, } -static MEMORY_INFO: Mutex> = Mutex::new(None); +/// Memory region info, initialized once during boot and never modified. +/// Using spin::Once instead of Mutex eliminates lock contention on the +/// frame allocation hot path - get() is a single atomic load after init. +static MEMORY_INFO: spin::Once = spin::Once::new(); static NEXT_FREE_FRAME: AtomicUsize = AtomicUsize::new(0); /// Free list for deallocated frames @@ -91,23 +94,9 @@ impl BootInfoFrameAllocator { Self } - /// Get the nth usable frame + /// Get the nth usable frame (lock-free after initialization) fn get_usable_frame(n: usize) -> Option { - // Check if we're in a problematic allocation - if n > 1500 && n < 1600 { - log::debug!("get_usable_frame: Allocating frame number {}", n); - } - - // Try to detect potential deadlock - let info = match MEMORY_INFO.try_lock() { - Some(guard) => guard, - None => { - log::error!("MEMORY_INFO lock is already held - potential deadlock!"); - // Force a panic with more info - panic!("Frame allocator deadlock detected during allocation #{}", n); - } - }; - let info = info.as_ref()?; + let info = MEMORY_INFO.get()?; let mut count = 0; for i in 0..info.region_count { @@ -124,13 +113,7 @@ impl BootInfoFrameAllocator { "Attempting to allocate frame below low memory floor: {:#x}", frame_addr ); - - // Log problematic frame allocations - if frame_addr == 0x62f000 { - log::warn!("Allocating problematic frame 0x62f000 (frame #{}, region {}, offset {})", - n, i, frame_offset); - } - + // Production safety: Never return frames below the floor if frame_addr < LOW_MEMORY_FLOOR { log::error!( @@ -211,7 +194,7 @@ pub fn init(memory_regions: &'static MemoryRegions) { ignored_memory += region.end - region.start; continue; } - + if region_count < MAX_REGIONS { // Adjust region start if it begins below the floor let adjusted_start = if region.start < LOW_MEMORY_FLOOR { @@ -223,7 +206,7 @@ pub fn init(memory_regions: &'static MemoryRegions) { } else { region.start }; - + regions[region_count] = Some(UsableRegion { start: adjusted_start, end: region.end, @@ -238,8 +221,8 @@ pub fn init(memory_regions: &'static MemoryRegions) { } } - // Store the extracted information - *MEMORY_INFO.lock() = Some(MemoryInfo { + // Store the extracted information (once, immutable) + MEMORY_INFO.call_once(|| MemoryInfo { regions, region_count, }); @@ -280,7 +263,7 @@ pub fn init_aarch64(start: u64, end: u64) { let total_memory = end - aligned_start; - *MEMORY_INFO.lock() = Some(MemoryInfo { + MEMORY_INFO.call_once(|| MemoryInfo { regions, region_count: 1, }); @@ -381,19 +364,15 @@ pub struct MemoryStats { /// Returns total usable memory, allocated frame count, and free list size. /// These can be used to compute total, used, and free memory. pub fn memory_stats() -> MemoryStats { - // Calculate total memory from MEMORY_INFO regions - let total_bytes = if let Some(info_guard) = MEMORY_INFO.try_lock() { - if let Some(ref info) = *info_guard { - let mut total = 0u64; - for i in 0..info.region_count { - if let Some(region) = info.regions[i] { - total += region.end - region.start; - } + // Calculate total memory from MEMORY_INFO regions (lock-free read) + let total_bytes = if let Some(info) = MEMORY_INFO.get() { + let mut total = 0u64; + for i in 0..info.region_count { + if let Some(region) = info.regions[i] { + total += region.end - region.start; } - total - } else { - 0 } + total } else { 0 }; diff --git a/kernel/src/process/manager.rs b/kernel/src/process/manager.rs index 5f487b11..4578e6aa 100644 --- a/kernel/src/process/manager.rs +++ b/kernel/src/process/manager.rs @@ -582,10 +582,18 @@ impl ProcessManager { return Err("Process page table not available for stack mapping"); } - // Set up argc/argv on the stack following Linux ABI + // Set up argc/argv/envp/auxv on the stack following Linux ABI // The stack is now mapped, so we can write to it via physical addresses let initial_sp = if let Some(ref page_table) = process.page_table { - self.setup_argv_on_stack(page_table, user_stack_top, argv)? + self.setup_argv_on_stack( + page_table, + user_stack_top, + argv, + loaded_elf.phdr_vaddr, + loaded_elf.phnum, + loaded_elf.phentsize, + loaded_elf.entry_point, + )? } else { return Err("Process page table not available for argv setup"); }; @@ -993,6 +1001,11 @@ impl ProcessManager { self.processes.len() } + /// Iterate over all processes (for diagnostics) + pub fn iter_processes(&self) -> impl Iterator { + self.processes.iter().map(|(pid, p)| (*pid, p)) + } + /// Remove a process from the ready queue pub fn remove_from_ready_queue(&mut self, pid: ProcessId) -> bool { if let Some(index) = self.ready_queue.iter().position(|&p| p == pid) { @@ -2559,11 +2572,19 @@ impl ProcessManager { )?; } - // Set up argc/argv on the stack following Linux ABI + // Set up argc/argv/envp/auxv on the stack following Linux ABI // We need to write to the new stack pages that we just mapped // Since the new page table is not active yet, we need to translate addresses // and write via the physical frames - let initial_rsp = self.setup_argv_on_stack(&new_page_table, USER_STACK_TOP, argv)?; + let initial_rsp = self.setup_argv_on_stack( + &new_page_table, + USER_STACK_TOP, + argv, + loaded_elf.phdr_vaddr, + loaded_elf.phnum, + loaded_elf.phentsize, + loaded_elf.entry_point.as_u64(), + )?; log::info!( "exec_process_with_argv: argc/argv set up on stack, RSP={:#x}", @@ -2791,7 +2812,15 @@ impl ProcessManager { )?; } - let initial_rsp = self.setup_argv_on_stack(&new_page_table, user_stack_top, argv)?; + let initial_rsp = self.setup_argv_on_stack( + &new_page_table, + user_stack_top, + argv, + loaded_elf.phdr_vaddr, + loaded_elf.phnum, + loaded_elf.phentsize, + loaded_elf.entry_point, + )?; log::info!( "exec_process_with_argv [ARM64]: argc/argv set up on stack, SP_EL0={:#x}", @@ -3212,25 +3241,39 @@ impl ProcessManager { Ok(new_entry_point) } - /// Set up argc/argv on the stack for a new process + /// Set up argc/argv/envp/auxv on the stack for a new process /// - /// This function writes the argc/argv structure to the stack following the - /// Linux x86_64 ABI convention. The stack layout at _start is: + /// This function writes the full Linux ABI initial stack structure including + /// argc, argv pointers, envp (empty), and auxiliary vector entries needed by + /// musl libc. /// - /// High addresses: - /// argv strings (null-terminated, packed) - /// padding for 16-byte alignment - /// NULL (end of argv) + /// Stack layout (high to low addresses): + /// + /// 16 random bytes (for AT_RANDOM) + /// argv string data (null-terminated strings) + /// --- 8-byte alignment padding --- + /// AT_NULL (0, 0) // auxv terminator + /// AT_RANDOM (25, ptr_to_random) // pointer to 16 random bytes + /// AT_PAGESZ (6, 4096) // page size + /// AT_PHENT (4, phentsize) // size of program header entry + /// AT_PHNUM (5, phnum) // number of program headers + /// AT_PHDR (3, phdr_vaddr) // address of program headers in memory + /// AT_ENTRY (9, entry_point) // program entry point + /// NULL (envp terminator) // 8 bytes of 0 + /// NULL (argv terminator) // 8 bytes of 0 /// argv[n-1] pointer /// ... /// argv[0] pointer - /// argc <- RSP points here - /// Low addresses: + /// argc <- RSP points here (16-byte aligned) /// /// Parameters: /// - page_table: The process's page table (for translating virtual to physical addresses) /// - stack_top: The top of the stack (highest address) /// - argv: Array of argument strings (each must be null-terminated) + /// - phdr_vaddr: Virtual address of program headers in memory + /// - phnum: Number of program headers + /// - phentsize: Size of each program header entry + /// - entry_point: Program entry point address /// /// Returns: The initial RSP value (pointing to argc) #[allow(dead_code)] @@ -3239,16 +3282,52 @@ impl ProcessManager { page_table: &crate::memory::process_memory::ProcessPageTable, stack_top: u64, argv: &[&[u8]], + phdr_vaddr: u64, + phnum: u16, + phentsize: u16, + entry_point: u64, ) -> Result { let argc = argv.len(); // We need to access the stack memory directly via physical addresses // since the new page table isn't active yet - // Calculate total space needed for strings + // --- Phase 1: Write data at the top of the stack (strings + random bytes) --- + + // Start from the top of the stack and work downward + let mut cursor = stack_top; + + // Write 16 pseudo-random bytes for AT_RANDOM + // Use a simple mix of the stack address and fixed values since we don't + // have a strong PRNG requirement at boot time + cursor -= 16; + let random_addr = cursor; + let random_seed = stack_top.wrapping_mul(0x5851F42D4C957F2D).wrapping_add(0x14057B7EF767814F); + let random_bytes: [u8; 16] = [ + (random_seed >> 0) as u8, + (random_seed >> 8) as u8, + (random_seed >> 16) as u8, + (random_seed >> 24) as u8, + (random_seed >> 32) as u8, + (random_seed >> 40) as u8, + (random_seed >> 48) as u8, + (random_seed >> 56) as u8, + (random_seed.wrapping_mul(0x2545F4914F6CDD1D) >> 0) as u8, + (random_seed.wrapping_mul(0x2545F4914F6CDD1D) >> 8) as u8, + (random_seed.wrapping_mul(0x2545F4914F6CDD1D) >> 16) as u8, + (random_seed.wrapping_mul(0x2545F4914F6CDD1D) >> 24) as u8, + (random_seed.wrapping_mul(0x2545F4914F6CDD1D) >> 32) as u8, + (random_seed.wrapping_mul(0x2545F4914F6CDD1D) >> 40) as u8, + (random_seed.wrapping_mul(0x2545F4914F6CDD1D) >> 48) as u8, + (random_seed.wrapping_mul(0x2545F4914F6CDD1D) >> 56) as u8, + ]; + for (i, byte) in random_bytes.iter().enumerate() { + self.write_byte_to_stack(page_table, random_addr + i as u64, *byte)?; + } + + // Calculate total space needed for argv strings let mut total_string_space: usize = 0; for arg in argv.iter() { - // Each string + null terminator (if not already null-terminated) let len = arg.len(); if len > 0 && arg[len - 1] == 0 { total_string_space += len; @@ -3257,25 +3336,20 @@ impl ProcessManager { } } - // Start placing strings at the top of the stack and work down - let mut string_ptr = stack_top; - - // Reserve space for strings - string_ptr -= total_string_space as u64; + // Reserve space for strings below the random bytes + cursor -= total_string_space as u64; // Align down to 8 bytes for string area - string_ptr = string_ptr & !7; + cursor = cursor & !7; + let string_area_start = cursor; - // We'll collect the string addresses as we write them + // Write argv strings and collect their addresses let mut string_addresses: Vec = Vec::with_capacity(argc); - - // Write strings from the reserved area upward - let mut current_string_addr = string_ptr; + let mut current_string_addr = string_area_start; for arg in argv.iter() { string_addresses.push(current_string_addr); - // Write the string bytes for byte in arg.iter() { self.write_byte_to_stack(page_table, current_string_addr, *byte)?; current_string_addr += 1; @@ -3289,42 +3363,102 @@ impl ProcessManager { } } - // Now place the pointer array and argc below the strings - // Layout (from high to low): - // strings (already placed) - // NULL (8 bytes) - // argv[n-1] pointer (8 bytes) - // ... - // argv[0] pointer (8 bytes) - // argc (8 bytes) + // --- Phase 2: Build the pointer/value section below the strings --- - // Calculate space needed for pointers + argc - let pointers_space = (argc + 1) * 8 + 8; // argc pointers + NULL + argc value + // Auxiliary vector entries (each is two u64 values: type, value) + // AT_ENTRY, AT_PHDR, AT_PHNUM, AT_PHENT, AT_PAGESZ, AT_RANDOM, AT_NULL = 7 entries = 14 u64s + let auxv_count = 7; + let auxv_space = auxv_count * 2 * 8; // 7 entries * 2 u64s * 8 bytes - // Start of pointer area (below strings) - let mut ptr_area = string_ptr - pointers_space as u64; + // envp: just a NULL terminator (empty environment) = 1 u64 + let envp_space = 8; - // Align to 16 bytes (required by x86_64 ABI) - ptr_area = ptr_area & !15; + // argv: argc pointers + NULL terminator = (argc + 1) u64s + let argv_space = (argc + 1) * 8; - // Write argc at the bottom - let rsp = ptr_area; - self.write_u64_to_stack(page_table, rsp, argc as u64)?; + // argc: 1 u64 + let argc_space = 8; - // Write argv pointers - let argv_start = rsp + 8; - for (i, addr) in string_addresses.iter().enumerate() { - self.write_u64_to_stack(page_table, argv_start + (i * 8) as u64, *addr)?; - } + let total_ptr_space = argc_space + argv_space + envp_space + auxv_space; - // Write NULL terminator for argv array - self.write_u64_to_stack(page_table, argv_start + (argc * 8) as u64, 0)?; + // Position the pointer area below the string area + let mut ptr_area = string_area_start - total_ptr_space as u64; + + // Align to 16 bytes (required by x86_64/ARM64 ABI) + ptr_area = ptr_area & !15; + + // Write from the bottom up + let rsp = ptr_area; + let mut write_pos = rsp; + + // 1. Write argc + self.write_u64_to_stack(page_table, write_pos, argc as u64)?; + write_pos += 8; + + // 2. Write argv pointers + for addr in string_addresses.iter() { + self.write_u64_to_stack(page_table, write_pos, *addr)?; + write_pos += 8; + } + + // 3. Write argv NULL terminator + self.write_u64_to_stack(page_table, write_pos, 0)?; + write_pos += 8; + + // 4. Write envp NULL terminator (empty environment) + self.write_u64_to_stack(page_table, write_pos, 0)?; + write_pos += 8; + + // 5. Write auxiliary vector entries + // AT_ENTRY (9) - program entry point + self.write_u64_to_stack(page_table, write_pos, 9)?; // AT_ENTRY + write_pos += 8; + self.write_u64_to_stack(page_table, write_pos, entry_point)?; + write_pos += 8; + + // AT_PHDR (3) - address of program headers + self.write_u64_to_stack(page_table, write_pos, 3)?; // AT_PHDR + write_pos += 8; + self.write_u64_to_stack(page_table, write_pos, phdr_vaddr)?; + write_pos += 8; + + // AT_PHNUM (5) - number of program headers + self.write_u64_to_stack(page_table, write_pos, 5)?; // AT_PHNUM + write_pos += 8; + self.write_u64_to_stack(page_table, write_pos, phnum as u64)?; + write_pos += 8; + + // AT_PHENT (4) - size of each program header entry + self.write_u64_to_stack(page_table, write_pos, 4)?; // AT_PHENT + write_pos += 8; + self.write_u64_to_stack(page_table, write_pos, phentsize as u64)?; + write_pos += 8; + + // AT_PAGESZ (6) - page size + self.write_u64_to_stack(page_table, write_pos, 6)?; // AT_PAGESZ + write_pos += 8; + self.write_u64_to_stack(page_table, write_pos, 4096)?; + write_pos += 8; + + // AT_RANDOM (25) - pointer to 16 random bytes + self.write_u64_to_stack(page_table, write_pos, 25)?; // AT_RANDOM + write_pos += 8; + self.write_u64_to_stack(page_table, write_pos, random_addr)?; + write_pos += 8; + + // AT_NULL (0) - terminator + self.write_u64_to_stack(page_table, write_pos, 0)?; // AT_NULL + write_pos += 8; + self.write_u64_to_stack(page_table, write_pos, 0)?; log::debug!( - "setup_argv_on_stack: argc={}, RSP={:#x}, argv[0] at {:#x}", + "setup_argv_on_stack: argc={}, RSP={:#x}, argv[0] at {:#x}, auxv with phdr={:#x} phnum={} entry={:#x}", argc, rsp, - if !string_addresses.is_empty() { string_addresses[0] } else { 0 } + if !string_addresses.is_empty() { string_addresses[0] } else { 0 }, + phdr_vaddr, + phnum, + entry_point, ); Ok(rsp) diff --git a/kernel/src/process/process.rs b/kernel/src/process/process.rs index 37897cca..7f140f75 100644 --- a/kernel/src/process/process.rs +++ b/kernel/src/process/process.rs @@ -258,7 +258,19 @@ impl Process { /// Also cleans up Copy-on-Write frame references to avoid memory leaks. /// CRITICAL: Also marks the main thread as Terminated so the scheduler /// doesn't keep scheduling this thread after process termination. + /// + /// NOTE: This method does FD cleanup and CoW cleanup inline, which means + /// it acquires pipe locks, scheduler locks, and frame metadata locks. + /// For `handle_thread_exit`, use `terminate_minimal()` + deferred cleanup + /// to reduce PM lock hold time on ARM64 SMP. pub fn terminate(&mut self, exit_code: i32) { + // Guard against double-terminate: if the process is already terminated, + // skip all cleanup to prevent double-decrementing COW page refcounts + // (which would free pages still mapped by other processes). + if matches!(self.state, ProcessState::Terminated(_)) { + return; + } + // Close all file descriptors before setting state to Terminated // This ensures pipe counts are properly decremented so readers get EOF self.close_all_fds(); @@ -277,240 +289,119 @@ impl Process { // getting scheduled forever in an infinite loop. if let Some(ref mut thread) = self.main_thread { thread.set_terminated(); - log::info!( - "Process {} terminated (exit_code={}), marked thread {} as Terminated", - self.id.as_u64(), - exit_code, - thread.id() - ); } } + /// Minimal terminate: mark process and thread as terminated without cleanup. + /// + /// Used by `handle_thread_exit` to mark the process as terminated under PM lock, + /// then perform FD closure and CoW cleanup OUTSIDE the PM lock. This prevents + /// a system-wide hang on ARM64 SMP where logging, pipe wakeups, and scheduler + /// calls inside close_all_fds create lock ordering violations with the serial + /// output lock and framebuffer lock while all CPUs have interrupts disabled. + pub fn terminate_minimal(&mut self, exit_code: i32) { + if matches!(self.state, ProcessState::Terminated(_)) { + return; + } + self.state = ProcessState::Terminated(exit_code); + self.exit_code = Some(exit_code); + if let Some(ref mut thread) = self.main_thread { + thread.set_terminated(); + } + } + + /// Extract all file descriptor entries for deferred cleanup outside PM lock. + /// + /// Returns the FD entries without closing them — the caller is responsible + /// for pipe close_read/close_write, PTY refcounting, etc. + pub fn take_fd_entries(&mut self) -> alloc::vec::Vec<(usize, crate::ipc::fd::FileDescriptor)> { + self.fd_table.take_all() + } + /// Close all file descriptors in this process /// /// This properly decrements pipe reader/writer counts, ensuring that /// when all writers close, readers get EOF instead of EAGAIN. + /// + /// CRITICAL: No logging in this function — it runs under PM lock where + /// log calls create lock ordering violations (PM → SERIAL → framebuffer). #[cfg(target_arch = "x86_64")] fn close_all_fds(&mut self) { use crate::ipc::FdKind; - log::debug!("Process::close_all_fds() for process '{}'", self.name); - - // Close each fd, which will decrement pipe counts for fd in 0..crate::ipc::MAX_FDS { if let Ok(fd_entry) = self.fd_table.close(fd as i32) { match fd_entry.kind { - FdKind::PipeRead(buffer) => { - buffer.lock().close_read(); - log::debug!("Process::close_all_fds() - closed pipe read fd {}", fd); - } - FdKind::PipeWrite(buffer) => { - buffer.lock().close_write(); - log::debug!("Process::close_all_fds() - closed pipe write fd {}", fd); - } - FdKind::UdpSocket(_) => { - // Socket cleanup handled by UdpSocket::Drop when Arc refcount reaches 0 - log::debug!("Process::close_all_fds() - released UDP socket fd {}", fd); - } - FdKind::TcpSocket(_) => { - // Unbound TCP socket doesn't need cleanup - log::debug!("Process::close_all_fds() - released TCP socket fd {}", fd); - } - FdKind::TcpListener(port) => { - // Decrement ref count, remove only if it reaches 0 - crate::net::tcp::tcp_listener_ref_dec(port); - log::debug!("Process::close_all_fds() - released TCP listener fd {} on port {}", fd, port); - } - FdKind::TcpConnection(conn_id) => { - // Close the TCP connection - let _ = crate::net::tcp::tcp_close(&conn_id); - log::debug!("Process::close_all_fds() - closed TCP connection fd {}", fd); - } - FdKind::StdIo(_) => { - // StdIo doesn't need cleanup - } - FdKind::RegularFile(_) => { - // Regular file cleanup handled by Arc refcount - log::debug!("Process::close_all_fds() - released regular file fd {}", fd); - } - FdKind::Directory(_) => { - // Directory cleanup handled by Arc refcount - log::debug!("Process::close_all_fds() - released directory fd {}", fd); - } - FdKind::Device(_) => { - // Device files don't need cleanup - log::debug!("Process::close_all_fds() - released device fd {}", fd); - } - FdKind::DevfsDirectory { .. } => { - // Devfs directory doesn't need cleanup - log::debug!("Process::close_all_fds() - released devfs directory fd {}", fd); - } - FdKind::DevptsDirectory { .. } => { - // Devpts directory doesn't need cleanup - log::debug!("Process::close_all_fds() - released devpts directory fd {}", fd); - } + FdKind::PipeRead(buffer) => { buffer.lock().close_read(); } + FdKind::PipeWrite(buffer) => { buffer.lock().close_write(); } + FdKind::TcpListener(port) => { crate::net::tcp::tcp_listener_ref_dec(port); } + FdKind::TcpConnection(conn_id) => { let _ = crate::net::tcp::tcp_close(&conn_id); } FdKind::PtyMaster(pty_num) => { - // PTY master cleanup - decrement refcount, only release when all masters closed if let Some(pair) = crate::tty::pty::get(pty_num) { let old_count = pair.master_refcount.fetch_sub(1, core::sync::atomic::Ordering::SeqCst); - log::debug!("Process::close_all_fds() - PTY master fd {} (pty {}) refcount {} -> {}", - fd, pty_num, old_count, old_count - 1); if old_count == 1 { crate::tty::pty::release(pty_num); - log::debug!("Process::close_all_fds() - released PTY {} (last master closed)", pty_num); } } } FdKind::PtySlave(pty_num) => { - // Decrement slave refcount — master sees POLLHUP when last slave closes if let Some(pair) = crate::tty::pty::get(pty_num) { pair.slave_close(); } - log::debug!("Process::close_all_fds() - released PTY slave fd {}", fd); - } - FdKind::UnixStream(socket) => { - // Close Unix socket endpoint - socket.lock().close(); - log::debug!("Process::close_all_fds() - closed Unix stream socket fd {}", fd); - } - FdKind::UnixSocket(_) => { - // Unbound/bound Unix socket doesn't need cleanup - log::debug!("Process::close_all_fds() - released Unix socket fd {}", fd); - } - FdKind::UnixListener(_) => { - // Unix listener socket cleanup handled by Arc refcount - log::debug!("Process::close_all_fds() - released Unix listener fd {}", fd); } + FdKind::UnixStream(socket) => { socket.lock().close(); } FdKind::FifoRead(path, buffer) => { - // Close FIFO read end crate::ipc::fifo::close_fifo_read(&path); buffer.lock().close_read(); - log::debug!("Process::close_all_fds() - closed FIFO read fd {} ({})", fd, path); } FdKind::FifoWrite(path, buffer) => { - // Close FIFO write end crate::ipc::fifo::close_fifo_write(&path); buffer.lock().close_write(); - log::debug!("Process::close_all_fds() - closed FIFO write fd {} ({})", fd, path); - } - FdKind::ProcfsFile { .. } => { - // Procfs files are purely in-memory, nothing to clean up - } - FdKind::ProcfsDirectory { .. } => { - // Procfs directory doesn't need cleanup } + _ => {} // StdIo, RegularFile, Directory, Device, etc. — no action needed } } } } /// Close all file descriptors in this process (ARM64) + /// + /// CRITICAL: No logging in this function — it runs under PM lock where + /// log calls create lock ordering violations (PM → SERIAL → framebuffer). #[cfg(not(target_arch = "x86_64"))] fn close_all_fds(&mut self) { use crate::ipc::FdKind; - log::debug!("Process::close_all_fds() for process '{}'", self.name); - - // Close each fd, which will decrement pipe counts for fd in 0..crate::ipc::MAX_FDS { if let Ok(fd_entry) = self.fd_table.close(fd as i32) { match fd_entry.kind { - FdKind::PipeRead(buffer) => { - buffer.lock().close_read(); - log::debug!("Process::close_all_fds() - closed pipe read fd {}", fd); - } - FdKind::PipeWrite(buffer) => { - buffer.lock().close_write(); - log::debug!("Process::close_all_fds() - closed pipe write fd {}", fd); - } - FdKind::StdIo(_) => { - // StdIo doesn't need cleanup - } - FdKind::UdpSocket(_) => { - // UDP socket cleanup handled by Drop - log::debug!("Process::close_all_fds() - closed UDP socket fd {}", fd); - } - FdKind::UnixStream(_) => { - // Unix stream cleanup handled by Drop - log::debug!("Process::close_all_fds() - closed Unix stream fd {}", fd); - } - FdKind::UnixSocket(_) => { - // Unix socket cleanup handled by Drop - log::debug!("Process::close_all_fds() - closed Unix socket fd {}", fd); - } - FdKind::UnixListener(_) => { - // Unix listener cleanup handled by Drop - log::debug!("Process::close_all_fds() - closed Unix listener fd {}", fd); - } + FdKind::PipeRead(buffer) => { buffer.lock().close_read(); } + FdKind::PipeWrite(buffer) => { buffer.lock().close_write(); } + FdKind::TcpListener(port) => { crate::net::tcp::tcp_listener_ref_dec(port); } + FdKind::TcpConnection(conn_id) => { let _ = crate::net::tcp::tcp_close(&conn_id); } FdKind::PtyMaster(pty_num) => { - // PTY master cleanup - decrement refcount, only release when all masters closed if let Some(pair) = crate::tty::pty::get(pty_num) { let old_count = pair.master_refcount.fetch_sub(1, core::sync::atomic::Ordering::SeqCst); if old_count == 1 { crate::tty::pty::release(pty_num); } } - log::debug!("Process::close_all_fds() - closed PTY master fd {}", fd); } FdKind::PtySlave(pty_num) => { - // Decrement slave refcount — master sees POLLHUP when last slave closes if let Some(pair) = crate::tty::pty::get(pty_num) { pair.slave_close(); } - log::debug!("Process::close_all_fds() - closed PTY slave fd {}", fd); - } - FdKind::RegularFile(_) => { - // Regular file cleanup handled by Arc refcount - log::debug!("Process::close_all_fds() - released regular file fd {}", fd); - } - FdKind::Directory(_) => { - // Directory cleanup handled by Arc refcount - log::debug!("Process::close_all_fds() - released directory fd {}", fd); - } - FdKind::Device(_) => { - // Device files don't need cleanup - log::debug!("Process::close_all_fds() - released device fd {}", fd); - } - FdKind::DevfsDirectory { .. } => { - // Devfs directory doesn't need cleanup - log::debug!("Process::close_all_fds() - released devfs directory fd {}", fd); - } - FdKind::DevptsDirectory { .. } => { - // Devpts directory doesn't need cleanup - log::debug!("Process::close_all_fds() - released devpts directory fd {}", fd); } + FdKind::UnixStream(socket) => { socket.lock().close(); } FdKind::FifoRead(path, buffer) => { - // Close FIFO read end crate::ipc::fifo::close_fifo_read(&path); buffer.lock().close_read(); - log::debug!("Process::close_all_fds() - closed FIFO read fd {} ({})", fd, path); } FdKind::FifoWrite(path, buffer) => { - // Close FIFO write end crate::ipc::fifo::close_fifo_write(&path); buffer.lock().close_write(); - log::debug!("Process::close_all_fds() - closed FIFO write fd {} ({})", fd, path); - } - FdKind::TcpSocket(_) => { - // Unbound TCP socket doesn't need cleanup - log::debug!("Process::close_all_fds() - closed TCP socket fd {}", fd); - } - FdKind::TcpListener(port) => { - // Decrement ref count, remove only if it reaches 0 - crate::net::tcp::tcp_listener_ref_dec(port); - log::debug!("Process::close_all_fds() - released TCP listener fd {} port {}", fd, port); - } - FdKind::TcpConnection(conn_id) => { - // Close TCP connection - let _ = crate::net::tcp::tcp_close(&conn_id); - log::debug!("Process::close_all_fds() - closed TCP connection fd {}", fd); - } - FdKind::ProcfsFile { .. } => { - // Procfs files are purely in-memory, nothing to clean up - } - FdKind::ProcfsDirectory { .. } => { - // Procfs directory doesn't need cleanup } + _ => {} // StdIo, RegularFile, Directory, Device, etc. — no action needed } } } @@ -522,7 +413,7 @@ impl Process { /// reference counts. Frames that are no longer shared (refcount reaches 0) /// are returned to the frame allocator for reuse. #[cfg(target_arch = "x86_64")] - fn cleanup_cow_frames(&mut self) { + pub(crate) fn cleanup_cow_frames(&mut self) { use crate::memory::frame_allocator::deallocate_frame; use crate::memory::frame_metadata::frame_decref; use x86_64::structures::paging::{PageTableFlags, PhysFrame}; @@ -530,18 +421,9 @@ impl Process { // Get the page table for this process let page_table = match self.page_table.as_ref() { Some(pt) => pt, - None => { - log::debug!( - "Process {}: No page table to clean up", - self.id.as_u64() - ); - return; - } + None => return, }; - let mut freed_count = 0; - let mut shared_count = 0; - // Walk all user pages and decrement refcounts let _ = page_table.walk_mapped_pages(|_virt_addr, phys_addr, flags| { // Only process user-accessible pages @@ -558,20 +440,8 @@ impl Process { // Returns false if still shared (refcount > 0 after decrement). if frame_decref(frame) { deallocate_frame(frame); - freed_count += 1; - } else { - shared_count += 1; } }); - - if freed_count > 0 || shared_count > 0 { - log::debug!( - "Process {}: CoW cleanup - freed {} frames, {} still shared", - self.id.as_u64(), - freed_count, - shared_count - ); - } } /// Clean up Copy-on-Write frame references when process exits (ARM64) @@ -579,8 +449,10 @@ impl Process { /// Walks all user pages in the process's page table and decrements their /// reference counts. Frames that are no longer shared (refcount reaches 0) /// are returned to the frame allocator for reuse. + /// + /// CRITICAL: No logging — may run under PM lock. #[cfg(not(target_arch = "x86_64"))] - fn cleanup_cow_frames(&mut self) { + pub(crate) fn cleanup_cow_frames(&mut self) { use crate::memory::frame_allocator::deallocate_frame; use crate::memory::frame_metadata::frame_decref; use crate::memory::arch_stub::{PageTableFlags, PhysFrame}; @@ -588,18 +460,9 @@ impl Process { // Get the page table for this process let page_table = match self.page_table.as_ref() { Some(pt) => pt, - None => { - log::debug!( - "Process {}: No page table to clean up", - self.id.as_u64() - ); - return; - } + None => return, }; - let mut freed_count = 0; - let mut shared_count = 0; - // Walk all user pages and decrement refcounts let _ = page_table.walk_mapped_pages(|_virt_addr, phys_addr, flags| { // Only process user-accessible pages @@ -609,27 +472,10 @@ impl Process { let frame = PhysFrame::containing_address(phys_addr); - // Decrement reference count. - // Returns true if the frame should be freed: - // - Tracked frame whose refcount reached 0 (was shared, now sole owner exiting) - // - Untracked frame (private to this process, never shared via CoW) - // Returns false if still shared (refcount > 0 after decrement). if frame_decref(frame) { deallocate_frame(frame); - freed_count += 1; - } else { - shared_count += 1; } }); - - if freed_count > 0 || shared_count > 0 { - log::debug!( - "Process {}: CoW cleanup - freed {} frames, {} still shared", - self.id.as_u64(), - freed_count, - shared_count - ); - } } /// Drain and clean up any pending old page tables from previous exec() calls. @@ -638,16 +484,8 @@ impl Process { /// page table (e.g., at the start of the next exec, or during process exit). /// Each old page table has its user-space frames freed via `cleanup_for_exec()`. pub fn drain_old_page_tables(&mut self) { - if !self.pending_old_page_tables.is_empty() { - let count = self.pending_old_page_tables.len(); - for old_pt in self.pending_old_page_tables.drain(..) { - old_pt.cleanup_for_exec(); - } - log::debug!( - "Process {}: drained {} pending old page table(s)", - self.id.as_u64(), - count - ); + for old_pt in self.pending_old_page_tables.drain(..) { + old_pt.cleanup_for_exec(); } } diff --git a/kernel/src/syscall/dispatcher.rs b/kernel/src/syscall/dispatcher.rs index 77b0bee2..854950d9 100644 --- a/kernel/src/syscall/dispatcher.rs +++ b/kernel/src/syscall/dispatcher.rs @@ -31,7 +31,6 @@ pub fn dispatch_syscall( SyscallNumber::Write => handlers::sys_write(arg1, arg2, arg3), SyscallNumber::Read => handlers::sys_read(arg1, arg2, arg3), SyscallNumber::Yield => handlers::sys_yield(), - SyscallNumber::GetTime => handlers::sys_get_time(), SyscallNumber::Fork => handlers::sys_fork(), SyscallNumber::Exec => handlers::sys_exec(arg1, arg2), SyscallNumber::GetPid => handlers::sys_getpid(), @@ -107,6 +106,18 @@ pub fn dispatch_syscall( SyscallNumber::Symlink => super::fs::sys_symlink(arg1, arg2), SyscallNumber::Readlink => super::fs::sys_readlink(arg1, arg2, arg3), SyscallNumber::Mknod => super::fifo::sys_mknod(arg1, arg2 as u32, arg3), + // *at variants (Linux ARM64 uses these, x86_64 also supports them) + SyscallNumber::Openat => super::fs::sys_openat(arg1 as i32, arg2, arg3 as u32, arg4 as u32), + SyscallNumber::Faccessat => super::fs::sys_faccessat(arg1 as i32, arg2, arg3 as u32, arg4 as u32), + SyscallNumber::Mkdirat => super::fs::sys_mkdirat(arg1 as i32, arg2, arg3 as u32), + SyscallNumber::Mknodat => super::fs::sys_mknodat(arg1 as i32, arg2, arg3 as u32, arg4), + SyscallNumber::Unlinkat => super::fs::sys_unlinkat(arg1 as i32, arg2, arg3 as i32), + SyscallNumber::Symlinkat => super::fs::sys_symlinkat(arg1, arg2 as i32, arg3), + SyscallNumber::Linkat => super::fs::sys_linkat(arg1 as i32, arg2, arg3 as i32, arg4, arg5 as i32), + SyscallNumber::Renameat => super::fs::sys_renameat(arg1 as i32, arg2, arg3 as i32, arg4), + SyscallNumber::Readlinkat => super::fs::sys_readlinkat(arg1 as i32, arg2, arg3, arg4), + SyscallNumber::Dup3 => handlers::sys_dup2(arg1, arg2), // dup3 with flags=0 is dup2 + SyscallNumber::Pselect6 => handlers::sys_select(arg1 as i32, arg2, arg3, arg4, arg5), // simplified // PTY syscalls SyscallNumber::PosixOpenpt => super::pty::sys_posix_openpt(arg1), SyscallNumber::Grantpt => super::pty::sys_grantpt(arg1), @@ -126,6 +137,39 @@ pub fn dispatch_syscall( // Display takeover (Breenix-specific) SyscallNumber::TakeOverDisplay => super::handlers::sys_take_over_display(), SyscallNumber::GiveBackDisplay => super::handlers::sys_give_back_display(), + // Vectored I/O + SyscallNumber::Readv => super::iovec::sys_readv(arg1, arg2, arg3), + SyscallNumber::Writev => super::iovec::sys_writev(arg1, arg2, arg3), + // Stubs for musl libc compatibility + SyscallNumber::Mremap => SyscallResult::Err(super::errno::ENOMEM as u64), + SyscallNumber::Madvise => SyscallResult::Ok(0), + SyscallNumber::Ppoll => super::handlers::sys_ppoll(arg1, arg2, arg3, arg4, arg5), + SyscallNumber::SetRobustList => SyscallResult::Ok(0), + // arch_prctl (x86_64 only) + SyscallNumber::ArchPrctl => { + const ARCH_SET_FS: u64 = 0x1002; + const ARCH_GET_FS: u64 = 0x1003; + match arg1 { + ARCH_SET_FS => { + x86_64::registers::model_specific::FsBase::write( + x86_64::VirtAddr::new(arg2), + ); + SyscallResult::Ok(0) + } + ARCH_GET_FS => { + let fs_base = x86_64::registers::model_specific::FsBase::read().as_u64(); + match super::userptr::copy_to_user(arg2 as *mut u64, &fs_base) { + Ok(()) => SyscallResult::Ok(0), + Err(e) => SyscallResult::Err(e), + } + } + _ => SyscallResult::Err(super::errno::EINVAL as u64), + } + } + // Filesystem: newfstatat + SyscallNumber::Newfstatat => super::fs::sys_newfstatat(arg1 as i32, arg2, arg3, arg4 as u32), + // GetTime is not mapped on x86_64 (kept for ARM64 compat) + SyscallNumber::GetTime => SyscallResult::Err(38), // ENOSYS // Testing/diagnostic syscalls (Breenix-specific) SyscallNumber::CowStats => super::handlers::sys_cow_stats(arg1), SyscallNumber::SimulateOom => super::handlers::sys_simulate_oom(arg1), diff --git a/kernel/src/syscall/fs.rs b/kernel/src/syscall/fs.rs index ca3c9823..4f71dbab 100644 --- a/kernel/src/syscall/fs.rs +++ b/kernel/src/syscall/fs.rs @@ -3269,3 +3269,197 @@ fn handle_fifo_open(path: &str, flags: u32) -> SyscallResult { } } } + +/// newfstatat(dirfd, pathname, statbuf, flags) - Get file status by path +/// +/// Linux syscall 262. Supports AT_FDCWD (-100) as dirfd to stat relative +/// to the current working directory. Required by musl libc. +pub fn sys_newfstatat(dirfd: i32, pathname: u64, statbuf: u64, _flags: u32) -> SyscallResult { + use super::errno::{EFAULT, ENOENT}; + use super::userptr::copy_cstr_from_user; + use crate::fs::ext2; + + const AT_FDCWD: i32 = -100; + + if statbuf == 0 { + return SyscallResult::Err(EFAULT as u64); + } + if pathname == 0 { + return SyscallResult::Err(EFAULT as u64); + } + + // Read pathname from userspace + let path = match copy_cstr_from_user(pathname) { + Ok(s) => s, + Err(e) => return SyscallResult::Err(e as u64), + }; + + // We only support AT_FDCWD for now + if dirfd != AT_FDCWD && !path.starts_with('/') { + // Relative paths with non-AT_FDCWD dirfd not yet supported + return SyscallResult::Err(super::errno::ENOSYS as u64); + } + + // Resolve the path to a full path (handle CWD for relative paths) + let full_path = if path.starts_with('/') { + path.clone() + } else { + // Get CWD from current process + let cwd = get_current_cwd().unwrap_or_else(|| alloc::string::String::from("/")); + if cwd.ends_with('/') { + alloc::format!("{}{}", cwd, path) + } else { + alloc::format!("{}/{}", cwd, path) + } + }; + + // Determine which filesystem to use + let is_home = ext2::is_home_path(&full_path); + let fs_path = if is_home { ext2::strip_home_prefix(&full_path) } else { &full_path }; + + // Look up inode by path + let (inode_num, mount_id) = if is_home { + let fs_guard = ext2::home_fs_read(); + let fs = match fs_guard.as_ref() { + Some(f) => f, + None => return SyscallResult::Err(ENOENT as u64), + }; + let mid = fs.mount_id; + match fs.resolve_path(fs_path) { + Ok(inum) => (inum as u64, mid), + Err(_) => return SyscallResult::Err(ENOENT as u64), + } + } else { + let fs_guard = ext2::root_fs_read(); + let fs = match fs_guard.as_ref() { + Some(f) => f, + None => return SyscallResult::Err(ENOENT as u64), + }; + let mid = fs.mount_id; + match fs.resolve_path(fs_path) { + Ok(inum) => (inum as u64, mid), + Err(_) => return SyscallResult::Err(ENOENT as u64), + } + }; + + // Build stat from inode + let mut stat = Stat::zeroed(); + stat.st_dev = mount_id as u64; + stat.st_ino = inode_num; + stat.st_blksize = 4096; + stat.st_nlink = 1; + stat.st_mode = S_IFREG | 0o644; // Default + + if let Some(inode_stat) = load_ext2_inode_stat_for_mount(inode_num, mount_id) { + stat.st_mode = inode_stat.mode; + stat.st_uid = inode_stat.uid; + stat.st_gid = inode_stat.gid; + stat.st_size = inode_stat.size; + stat.st_nlink = inode_stat.nlink; + stat.st_atime = inode_stat.atime; + stat.st_mtime = inode_stat.mtime; + stat.st_ctime = inode_stat.ctime; + stat.st_blocks = inode_stat.blocks; + } + + // Copy stat to userspace using raw pointer write + // (Stat doesn't implement Copy, so we can't use copy_to_user) + unsafe { + let user_stat = statbuf as *mut Stat; + core::ptr::write(user_stat, stat); + } + + SyscallResult::Ok(0) +} + +// ============================================================================= +// *at syscall variants (Linux ARM64 uses these instead of legacy syscalls) +// ============================================================================= +// +// ARM64 Linux has no open, mkdir, rmdir, link, unlink, symlink, readlink, +// mknod, rename, access. Instead it has *at variants that take a dirfd. +// These wrappers validate AT_FDCWD and delegate to the existing implementations. + +/// AT_FDCWD: Use current working directory for relative paths +const AT_FDCWD: i32 = -100; +/// AT_REMOVEDIR flag for unlinkat (behave like rmdir) +const AT_REMOVEDIR: i32 = 0x200; + +/// openat(dirfd, pathname, flags, mode) - replacement for open +pub fn sys_openat(dirfd: i32, pathname: u64, flags: u32, mode: u32) -> SyscallResult { + if dirfd != AT_FDCWD { + return SyscallResult::Err(super::errno::ENOSYS as u64); + } + sys_open(pathname, flags, mode) +} + +/// faccessat(dirfd, pathname, mode, flags) - replacement for access +pub fn sys_faccessat(dirfd: i32, pathname: u64, mode: u32, _flags: u32) -> SyscallResult { + if dirfd != AT_FDCWD { + return SyscallResult::Err(super::errno::ENOSYS as u64); + } + sys_access(pathname, mode) +} + +/// mkdirat(dirfd, pathname, mode) - replacement for mkdir +pub fn sys_mkdirat(dirfd: i32, pathname: u64, mode: u32) -> SyscallResult { + if dirfd != AT_FDCWD { + return SyscallResult::Err(super::errno::ENOSYS as u64); + } + sys_mkdir(pathname, mode) +} + +/// mknodat(dirfd, pathname, mode, dev) - replacement for mknod +pub fn sys_mknodat(dirfd: i32, pathname: u64, mode: u32, dev: u64) -> SyscallResult { + if dirfd != AT_FDCWD { + return SyscallResult::Err(super::errno::ENOSYS as u64); + } + super::fifo::sys_mknod(pathname, mode, dev) +} + +/// unlinkat(dirfd, pathname, flags) - replacement for unlink and rmdir +/// +/// If flags contains AT_REMOVEDIR, behaves like rmdir. +/// Otherwise behaves like unlink. +pub fn sys_unlinkat(dirfd: i32, pathname: u64, flags: i32) -> SyscallResult { + if dirfd != AT_FDCWD { + return SyscallResult::Err(super::errno::ENOSYS as u64); + } + if (flags & AT_REMOVEDIR) != 0 { + sys_rmdir(pathname) + } else { + sys_unlink(pathname) + } +} + +/// symlinkat(target, newdirfd, linkpath) - replacement for symlink +pub fn sys_symlinkat(target: u64, newdirfd: i32, linkpath: u64) -> SyscallResult { + if newdirfd != AT_FDCWD { + return SyscallResult::Err(super::errno::ENOSYS as u64); + } + sys_symlink(target, linkpath) +} + +/// linkat(olddirfd, oldpath, newdirfd, newpath, flags) - replacement for link +pub fn sys_linkat(olddirfd: i32, oldpath: u64, newdirfd: i32, newpath: u64, _flags: i32) -> SyscallResult { + if olddirfd != AT_FDCWD || newdirfd != AT_FDCWD { + return SyscallResult::Err(super::errno::ENOSYS as u64); + } + sys_link(oldpath, newpath) +} + +/// renameat(olddirfd, oldpath, newdirfd, newpath) - replacement for rename +pub fn sys_renameat(olddirfd: i32, oldpath: u64, newdirfd: i32, newpath: u64) -> SyscallResult { + if olddirfd != AT_FDCWD || newdirfd != AT_FDCWD { + return SyscallResult::Err(super::errno::ENOSYS as u64); + } + sys_rename(oldpath, newpath) +} + +/// readlinkat(dirfd, pathname, buf, bufsiz) - replacement for readlink +pub fn sys_readlinkat(dirfd: i32, pathname: u64, buf: u64, bufsiz: u64) -> SyscallResult { + if dirfd != AT_FDCWD { + return SyscallResult::Err(super::errno::ENOSYS as u64); + } + sys_readlink(pathname, buf, bufsiz) +} diff --git a/kernel/src/syscall/graphics.rs b/kernel/src/syscall/graphics.rs index c0e499bf..3ca7cdda 100644 --- a/kernel/src/syscall/graphics.rs +++ b/kernel/src/syscall/graphics.rs @@ -69,47 +69,65 @@ pub fn sys_fbinfo(info_ptr: u64) -> SyscallResult { return SyscallResult::Err(super::ErrorCode::Fault as u64); } - // Get framebuffer info from the shell framebuffer - let fb = match SHELL_FRAMEBUFFER.get() { - Some(fb) => fb, - None => { - log::warn!("sys_fbinfo: No framebuffer available"); - return SyscallResult::Err(super::ErrorCode::InvalidArgument as u64); + // On ARM64, use the lock-free FbInfoCache to avoid contention with BWM's + // fb_flush, which holds SHELL_FRAMEBUFFER for ~400μs during full-screen + // pixel copies. Framebuffer dimensions are immutable after init. + #[cfg(target_arch = "aarch64")] + let info = { + let cache = match crate::graphics::arm64_fb::FB_INFO_CACHE.get() { + Some(c) => c, + None => { + log::warn!("sys_fbinfo: No framebuffer available"); + return SyscallResult::Err(super::ErrorCode::InvalidArgument as u64); + } + }; + FbInfo { + width: cache.width as u64, + height: cache.height as u64, + stride: cache.stride as u64, + bytes_per_pixel: cache.bytes_per_pixel as u64, + pixel_format: if cache.is_bgr { 1 } else { 0 }, } }; - // Use try_lock with bounded spin. On ARM64, syscalls run with interrupts - // disabled (DAIF=1111), so a blocking lock() would spin forever if the - // holder was preempted or terminated while holding the lock. Use a generous - // spin count since this is a one-time startup call. - let fb_guard = { - let mut guard = None; - for _ in 0..65536 { - if let Some(g) = fb.try_lock() { - guard = Some(g); - break; - } - core::hint::spin_loop(); - } - match guard { - Some(g) => g, + // On x86_64, acquire the framebuffer lock to read dimensions. + // Use try_lock with bounded spin since this is a one-time startup call. + #[cfg(not(target_arch = "aarch64"))] + let info = { + let fb = match SHELL_FRAMEBUFFER.get() { + Some(fb) => fb, None => { - log::warn!("sys_fbinfo: framebuffer lock busy after 65536 spins"); - return SyscallResult::Err(super::ErrorCode::Busy as u64); + log::warn!("sys_fbinfo: No framebuffer available"); + return SyscallResult::Err(super::ErrorCode::InvalidArgument as u64); + } + }; + let fb_guard = { + let mut guard = None; + for _ in 0..65536 { + if let Some(g) = fb.try_lock() { + guard = Some(g); + break; + } + core::hint::spin_loop(); + } + match guard { + Some(g) => g, + None => { + log::warn!("sys_fbinfo: framebuffer lock busy after 65536 spins"); + return SyscallResult::Err(super::ErrorCode::Busy as u64); + } } + }; + use crate::graphics::primitives::Canvas; + FbInfo { + width: fb_guard.width() as u64, + height: fb_guard.height() as u64, + stride: fb_guard.stride() as u64, + bytes_per_pixel: fb_guard.bytes_per_pixel() as u64, + pixel_format: if fb_guard.is_bgr() { 1 } else { 0 }, } }; - // Get info through Canvas trait methods - use crate::graphics::primitives::Canvas; - let info = FbInfo { - width: fb_guard.width() as u64, - height: fb_guard.height() as u64, - stride: fb_guard.stride() as u64, - bytes_per_pixel: fb_guard.bytes_per_pixel() as u64, - pixel_format: if fb_guard.is_bgr() { 1 } else { 0 }, - }; - // Copy to userspace (pointer already validated above) unsafe { let info_out = info_ptr as *mut FbInfo; @@ -655,14 +673,34 @@ pub fn sys_fbmmap() -> SyscallResult { } }; - // Get framebuffer dimensions (acquire and release FB lock quickly) + // Get framebuffer dimensions. // The display owner (BWM) gets the right pane. All other processes get the left pane. + // + // On ARM64, use the lock-free FbInfoCache to avoid contention with BWM's + // fb_flush, which holds SHELL_FRAMEBUFFER for ~400μs during full-screen + // pixel copies. Dimensions are immutable after init. + #[cfg(target_arch = "aarch64")] + let (pane_width, x_offset, height, bpp) = { + let cache = match crate::graphics::arm64_fb::FB_INFO_CACHE.get() { + Some(c) => c, + None => return SyscallResult::Err(super::ErrorCode::InvalidArgument as u64), + }; + if caller_owns_display { + let divider_width = 4; + let right_x = cache.width / 2 + divider_width; + let right_width = cache.width.saturating_sub(right_x); + (right_width, right_x, cache.height, cache.bytes_per_pixel) + } else { + (cache.width / 2, 0, cache.height, cache.bytes_per_pixel) + } + }; + + #[cfg(not(target_arch = "aarch64"))] let (pane_width, x_offset, height, bpp) = { let fb = match SHELL_FRAMEBUFFER.get() { Some(fb) => fb, None => return SyscallResult::Err(super::ErrorCode::InvalidArgument as u64), }; - // Use try_lock with bounded spin (same rationale as sys_fbinfo). let fb_guard = { let mut guard = None; for _ in 0..65536 { @@ -681,13 +719,11 @@ pub fn sys_fbmmap() -> SyscallResult { } }; if caller_owns_display { - // BWM mode: right half for window manager (after divider) let divider_width = 4; let right_x = fb_guard.width() / 2 + divider_width; let right_width = fb_guard.width().saturating_sub(right_x); (right_width, right_x, fb_guard.height(), fb_guard.bytes_per_pixel()) } else { - // Normal mode: left half for graphics demos (fb_guard.width() / 2, 0, fb_guard.height(), fb_guard.bytes_per_pixel()) } }; diff --git a/kernel/src/syscall/handler.rs b/kernel/src/syscall/handler.rs index 6201f8cf..1214ae3d 100644 --- a/kernel/src/syscall/handler.rs +++ b/kernel/src/syscall/handler.rs @@ -219,7 +219,6 @@ pub extern "C" fn rust_syscall_handler(frame: &mut SyscallFrame) { Some(SyscallNumber::Write) => super::handlers::sys_write(args.0, args.1, args.2), Some(SyscallNumber::Read) => super::handlers::sys_read(args.0, args.1, args.2), Some(SyscallNumber::Yield) => super::handlers::sys_yield(), - Some(SyscallNumber::GetTime) => super::handlers::sys_get_time(), Some(SyscallNumber::Fork) => super::handlers::sys_fork_with_frame(frame), Some(SyscallNumber::Mmap) => { let addr = args.0; @@ -373,6 +372,18 @@ pub extern "C" fn rust_syscall_handler(frame: &mut SyscallFrame) { Some(SyscallNumber::Symlink) => super::fs::sys_symlink(args.0, args.1), Some(SyscallNumber::Readlink) => super::fs::sys_readlink(args.0, args.1, args.2), Some(SyscallNumber::Mknod) => super::fifo::sys_mknod(args.0, args.1 as u32, args.2), + // *at variants (ARM64 Linux has no legacy syscalls; x86_64 also supports these) + Some(SyscallNumber::Openat) => super::fs::sys_openat(args.0 as i32, args.1, args.2 as u32, args.3 as u32), + Some(SyscallNumber::Faccessat) => super::fs::sys_faccessat(args.0 as i32, args.1, args.2 as u32, args.3 as u32), + Some(SyscallNumber::Mkdirat) => super::fs::sys_mkdirat(args.0 as i32, args.1, args.2 as u32), + Some(SyscallNumber::Mknodat) => super::fs::sys_mknodat(args.0 as i32, args.1, args.2 as u32, args.3), + Some(SyscallNumber::Unlinkat) => super::fs::sys_unlinkat(args.0 as i32, args.1, args.2 as i32), + Some(SyscallNumber::Symlinkat) => super::fs::sys_symlinkat(args.0, args.1 as i32, args.2), + Some(SyscallNumber::Linkat) => super::fs::sys_linkat(args.0 as i32, args.1, args.2 as i32, args.3, args.4 as i32), + Some(SyscallNumber::Renameat) => super::fs::sys_renameat(args.0 as i32, args.1, args.2 as i32, args.3), + Some(SyscallNumber::Readlinkat) => super::fs::sys_readlinkat(args.0 as i32, args.1, args.2, args.3), + Some(SyscallNumber::Dup3) => super::handlers::sys_dup2(args.0, args.1), + Some(SyscallNumber::Pselect6) => super::handlers::sys_select(args.0 as i32, args.1, args.2, args.3, args.4), Some(SyscallNumber::CowStats) => super::handlers::sys_cow_stats(args.0), Some(SyscallNumber::SimulateOom) => super::handlers::sys_simulate_oom(args.0), // PTY syscalls @@ -389,6 +400,42 @@ pub extern "C" fn rust_syscall_handler(frame: &mut SyscallFrame) { Some(SyscallNumber::Futex) => { super::futex::sys_futex(args.0, args.1 as u32, args.2 as u32, args.3, args.4, args.5 as u32) } + // Vectored I/O + Some(SyscallNumber::Readv) => super::iovec::sys_readv(args.0, args.1, args.2), + Some(SyscallNumber::Writev) => super::iovec::sys_writev(args.0, args.1, args.2), + // Stubs for musl libc compatibility + Some(SyscallNumber::Mremap) => SyscallResult::Err(super::errno::ENOMEM as u64), + Some(SyscallNumber::Madvise) => SyscallResult::Ok(0), + Some(SyscallNumber::Ppoll) => super::handlers::sys_ppoll(args.0, args.1, args.2, args.3, args.4), + Some(SyscallNumber::SetRobustList) => SyscallResult::Ok(0), + // arch_prctl - x86_64 TLS setup + Some(SyscallNumber::ArchPrctl) => { + const ARCH_SET_FS: u64 = 0x1002; + const ARCH_GET_FS: u64 = 0x1003; + match args.0 { + ARCH_SET_FS => { + x86_64::registers::model_specific::FsBase::write( + x86_64::VirtAddr::new(args.1), + ); + SyscallResult::Ok(0) + } + ARCH_GET_FS => { + let fs_base = x86_64::registers::model_specific::FsBase::read().as_u64(); + match super::userptr::copy_to_user(args.1 as *mut u64, &fs_base) { + Ok(()) => SyscallResult::Ok(0), + Err(e) => SyscallResult::Err(e), + } + } + _ => SyscallResult::Err(super::errno::EINVAL as u64), + } + } + // Filesystem: newfstatat + Some(SyscallNumber::Newfstatat) => { + super::fs::sys_newfstatat(args.0 as i32, args.1, args.2, args.3 as u32) + } + // GetTime is not mapped on x86_64 (use ClockGetTime instead) + // It only exists in the enum for ARM64 compatibility + Some(SyscallNumber::GetTime) => SyscallResult::Err(super::ErrorCode::NoSys as u64), // Graphics syscalls Some(SyscallNumber::FbInfo) => super::graphics::sys_fbinfo(args.0), Some(SyscallNumber::FbDraw) => super::graphics::sys_fbdraw(args.0), diff --git a/kernel/src/syscall/handlers.rs b/kernel/src/syscall/handlers.rs index f776594b..349e9f3e 100644 --- a/kernel/src/syscall/handlers.rs +++ b/kernel/src/syscall/handlers.rs @@ -2651,18 +2651,34 @@ pub fn sys_waitpid(pid: i64, status_ptr: u64, options: u32) -> SyscallResult { return SyscallResult::Ok(0); } - // Blocking wait - block until child terminates - // Mark thread as blocked then enter HLT loop. The timer interrupt will - // see that current thread is blocked and switch to another thread. - // When the child exits, unblock_for_child_exit() puts us back in ready queue. + // Blocking wait: set BlockedOnChildExit FIRST, then re-check child. + // + // CRITICAL: This ordering prevents a lost-wakeup TOCTOU race: + // 1. Set BlockedOnChildExit (now unblock_for_child_exit WILL find us) + // 2. Re-check child state (catches exit during the race window) crate::task::scheduler::with_scheduler(|sched| { sched.block_current_for_child_exit(); }); - // Enable preemption before entering HLT loop so scheduler can switch threads. - // The syscall handler called preempt_disable() at entry, so we balance it here - // to allow context switches while blocked. We must re-disable before returning - // to match the preempt_enable() at syscall exit. + // Re-check child state to close the race window + { + let mg = crate::process::manager(); + if let Some(ref manager) = *mg { + if let Some(child) = manager.get_process(target_pid) { + if let crate::process::ProcessState::Terminated(exit_code) = child.state { + drop(mg); + crate::task::scheduler::with_scheduler(|sched| { + if let Some(thread) = sched.current_thread_mut() { + thread.blocked_in_syscall = false; + thread.set_ready(); + } + }); + return complete_wait(target_pid, exit_code, status_ptr, &children_copy); + } + } + } + } + crate::per_cpu::preempt_enable(); loop { @@ -2691,13 +2707,11 @@ pub fn sys_waitpid(pid: i64, status_ptr: u64, options: u32) -> SyscallResult { if let Some(child) = manager.get_process(target_pid) { if let crate::process::ProcessState::Terminated(exit_code) = child.state { drop(manager_guard); - // Re-disable preemption before returning to balance syscall exit's preempt_enable() crate::per_cpu::preempt_disable(); return complete_wait(target_pid, exit_code, status_ptr, &children_copy); } } } - // If not terminated yet (spurious wakeup), continue waiting } } @@ -2735,18 +2749,32 @@ pub fn sys_waitpid(pid: i64, status_ptr: u64, options: u32) -> SyscallResult { return SyscallResult::Ok(0); } - // Blocking wait - block until any child terminates - // Mark thread as blocked then enter HLT loop. The timer interrupt will - // see that current thread is blocked and switch to another thread. - // When a child exits, unblock_for_child_exit() puts us back in ready queue. + // Blocking wait: same TOCTOU prevention as the pid>0 path. crate::task::scheduler::with_scheduler(|sched| { sched.block_current_for_child_exit(); }); - // Enable preemption before entering HLT loop so scheduler can switch threads. - // The syscall handler called preempt_disable() at entry, so we balance it here - // to allow context switches while blocked. We must re-disable before returning - // to match the preempt_enable() at syscall exit. + // Re-check all children to close the race window + { + let mg = crate::process::manager(); + if let Some(ref manager) = *mg { + for &child_pid in &children_copy { + if let Some(child) = manager.get_process(child_pid) { + if let crate::process::ProcessState::Terminated(exit_code) = child.state { + drop(mg); + crate::task::scheduler::with_scheduler(|sched| { + if let Some(thread) = sched.current_thread_mut() { + thread.blocked_in_syscall = false; + thread.set_ready(); + } + }); + return complete_wait(child_pid, exit_code, status_ptr, &children_copy); + } + } + } + } + } + crate::per_cpu::preempt_enable(); loop { @@ -2776,14 +2804,12 @@ pub fn sys_waitpid(pid: i64, status_ptr: u64, options: u32) -> SyscallResult { if let Some(child) = manager.get_process(child_pid) { if let crate::process::ProcessState::Terminated(exit_code) = child.state { drop(manager_guard); - // Re-disable preemption before returning to balance syscall exit's preempt_enable() crate::per_cpu::preempt_disable(); return complete_wait(child_pid, exit_code, status_ptr, &children_copy); } } } } - // If no child terminated yet (spurious wakeup), continue waiting } } @@ -3377,6 +3403,37 @@ fn poll_ensure_address_space() { } } +/// sys_ppoll - Poll file descriptors with timespec timeout +/// +/// This implements the ppoll() syscall, which is the same as poll() but takes +/// a timespec instead of milliseconds and an optional signal mask (ignored). +/// +/// Arguments: +/// - fds_ptr: Pointer to array of pollfd structures +/// - nfds: Number of file descriptors to poll +/// - timeout_ts_ptr: Pointer to timespec (NULL = infinite timeout) +/// - sigmask: Signal mask pointer (ignored) +/// - sigsetsize: Size of signal mask (ignored) +/// +/// Delegates to sys_poll after converting timespec to milliseconds. +pub fn sys_ppoll(fds_ptr: u64, nfds: u64, timeout_ts_ptr: u64, _sigmask: u64, _sigsetsize: u64) -> SyscallResult { + let timeout_ms: i32 = if timeout_ts_ptr == 0 { + -1 // NULL timespec = infinite timeout + } else { + // Read timespec from userspace + #[repr(C)] + struct Timespec { + tv_sec: i64, + tv_nsec: i64, + } + let ts = unsafe { core::ptr::read(timeout_ts_ptr as *const Timespec) }; + // Convert to milliseconds, clamping to i32 range + let ms = ts.tv_sec.saturating_mul(1000).saturating_add(ts.tv_nsec / 1_000_000); + if ms > i32::MAX as i64 { i32::MAX } else { ms as i32 } + }; + sys_poll(fds_ptr, nfds, timeout_ms) +} + /// sys_select - Synchronous I/O multiplexing /// /// This implements the select() syscall which monitors multiple file descriptors diff --git a/kernel/src/syscall/iovec.rs b/kernel/src/syscall/iovec.rs new file mode 100644 index 00000000..09fc0e9c --- /dev/null +++ b/kernel/src/syscall/iovec.rs @@ -0,0 +1,114 @@ +//! Vectored I/O syscalls: readv and writev +//! +//! These syscalls read/write data from/to multiple buffers in a single call. +//! Required by musl libc for stdio operations. + +use super::handlers; +use super::SyscallResult; +use super::errno; +use super::userptr::copy_from_user; + +/// Maximum number of iovec entries per call (matches Linux UIO_MAXIOV) +const UIO_MAXIOV: u64 = 1024; + +/// iovec structure matching Linux ABI +#[repr(C)] +#[derive(Copy, Clone)] +struct IoVec { + iov_base: u64, + iov_len: u64, +} + +/// writev(fd, iov, iovcnt) - Write data from multiple buffers +/// +/// Writes data described by the array of iovec structures to the file descriptor. +/// Returns total bytes written or negative errno on error. +pub fn sys_writev(fd: u64, iov_ptr: u64, iovcnt: u64) -> SyscallResult { + if iovcnt == 0 { + return SyscallResult::Ok(0); + } + if iovcnt > UIO_MAXIOV { + return SyscallResult::Err(errno::EINVAL as u64); + } + if iov_ptr == 0 { + return SyscallResult::Err(errno::EFAULT as u64); + } + + let mut total: u64 = 0; + for i in 0..iovcnt { + let iov_addr = iov_ptr + i * core::mem::size_of::() as u64; + let iov: IoVec = match copy_from_user(iov_addr as *const IoVec) { + Ok(v) => v, + Err(e) => return SyscallResult::Err(e as u64), + }; + + if iov.iov_len == 0 { + continue; + } + + match handlers::sys_write(fd, iov.iov_base, iov.iov_len) { + SyscallResult::Ok(n) => { + total += n; + // Short write: stop early (like Linux) + if n < iov.iov_len { + break; + } + } + SyscallResult::Err(e) => { + // If we've already written some data, return partial count + if total > 0 { + break; + } + return SyscallResult::Err(e); + } + } + } + SyscallResult::Ok(total) +} + +/// readv(fd, iov, iovcnt) - Read data into multiple buffers +/// +/// Reads data from the file descriptor into the buffers described by the +/// array of iovec structures. Returns total bytes read or negative errno. +pub fn sys_readv(fd: u64, iov_ptr: u64, iovcnt: u64) -> SyscallResult { + if iovcnt == 0 { + return SyscallResult::Ok(0); + } + if iovcnt > UIO_MAXIOV { + return SyscallResult::Err(errno::EINVAL as u64); + } + if iov_ptr == 0 { + return SyscallResult::Err(errno::EFAULT as u64); + } + + let mut total: u64 = 0; + for i in 0..iovcnt { + let iov_addr = iov_ptr + i * core::mem::size_of::() as u64; + let iov: IoVec = match copy_from_user(iov_addr as *const IoVec) { + Ok(v) => v, + Err(e) => return SyscallResult::Err(e as u64), + }; + + if iov.iov_len == 0 { + continue; + } + + match handlers::sys_read(fd, iov.iov_base, iov.iov_len) { + SyscallResult::Ok(n) => { + total += n; + // Short read or EOF: stop early (like Linux) + if n < iov.iov_len { + break; + } + } + SyscallResult::Err(e) => { + // If we've already read some data, return partial count + if total > 0 { + break; + } + return SyscallResult::Err(e); + } + } + } + SyscallResult::Ok(total) +} diff --git a/kernel/src/syscall/mod.rs b/kernel/src/syscall/mod.rs index 3d527774..28e56dc2 100644 --- a/kernel/src/syscall/mod.rs +++ b/kernel/src/syscall/mod.rs @@ -28,6 +28,7 @@ pub mod handler; // - handlers is shared across architectures (arch-specific parts are cfg-gated internally) #[cfg(target_arch = "x86_64")] pub(crate) mod dispatcher; +pub mod iovec; pub mod clone; pub mod fifo; pub mod fs; @@ -45,119 +46,148 @@ pub mod socket; #[cfg(target_arch = "aarch64")] pub mod wait; -/// System call numbers following Linux conventions +/// System call numbers - semantic names only. +/// +/// The numeric mapping is architecture-specific and handled by `from_u64()`. +/// x86_64 uses Linux x86_64 ABI numbers for musl libc compatibility. +/// ARM64 uses Linux ARM64 (asm-generic) numbers for musl libc compatibility. #[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[repr(u64)] #[allow(dead_code)] pub enum SyscallNumber { - Exit = 0, - Write = 1, - Read = 2, - Yield = 3, // Note: Linux uses sched_yield = 24, but we use 3 - GetTime = 4, - Fork = 5, - Close = 6, // Custom number (Linux close = 3, conflicts with our Yield) - Poll = 7, // Linux syscall number for poll - Mmap = 9, // Linux syscall number for mmap - Mprotect = 10, // Linux syscall number for mprotect - Munmap = 11, // Linux syscall number for munmap - Brk = 12, // Linux syscall number for brk (heap management) - Sigaction = 13, // Linux syscall number for rt_sigaction - Sigprocmask = 14, // Linux syscall number for rt_sigprocmask - Sigreturn = 15, // Linux syscall number for rt_sigreturn - Ioctl = 16, // Linux syscall number for ioctl - Pipe = 22, // Linux syscall number for pipe - Select = 23, // Linux syscall number for select - Dup = 32, // Linux syscall number for dup - Dup2 = 33, // Linux syscall number for dup2 - Pause = 34, // Linux syscall number for pause - Nanosleep = 35, // Linux syscall number for nanosleep - Getitimer = 36, // Linux syscall number for getitimer - Alarm = 37, // Linux syscall number for alarm - Setitimer = 38, // Linux syscall number for setitimer - Fcntl = 72, // Linux syscall number for fcntl - GetPid = 39, // Linux syscall number for getpid - Socket = 41, // Linux syscall number for socket - Connect = 42, // Linux syscall number for connect - Accept = 43, // Linux syscall number for accept - SendTo = 44, // Linux syscall number for sendto - RecvFrom = 45, // Linux syscall number for recvfrom - Shutdown = 48, // Linux syscall number for shutdown - Bind = 49, // Linux syscall number for bind - Listen = 50, // Linux syscall number for listen - Socketpair = 53, // Linux syscall number for socketpair - Exec = 59, // Linux syscall number for execve - Wait4 = 61, // Linux syscall number for wait4/waitpid - Kill = 62, // Linux syscall number for kill - Getsockname = 51, // Linux syscall number for getsockname - Getpeername = 52, // Linux syscall number for getpeername - Setsockopt = 54, // Linux syscall number for setsockopt - Clone = 56, // Linux syscall number for clone - Getsockopt = 55, // Linux syscall number for getsockopt - SetPgid = 109, // Linux syscall number for setpgid - Getppid = 110, // Linux syscall number for getppid - SetSid = 112, // Linux syscall number for setsid - GetPgid = 121, // Linux syscall number for getpgid - GetSid = 124, // Linux syscall number for getsid - Sigpending = 127, // Linux syscall number for rt_sigpending - Sigsuspend = 130, // Linux syscall number for rt_sigsuspend - Sigaltstack = 131, // Linux syscall number for sigaltstack - GetTid = 186, // Linux syscall number for gettid - SetTidAddress = 218, // Linux syscall number for set_tid_address - ClockGetTime = 228, // Linux syscall number for clock_gettime - ExitGroup = 231, // Linux syscall number for exit_group - Pipe2 = 293, // Linux syscall number for pipe2 - Futex = 202, // Linux syscall number for futex - GetRandom = 318, // Linux syscall number for getrandom + // Core syscalls + Exit, + Write, + Read, + Yield, + GetTime, // Legacy: ARM64 only (x86_64 uses ClockGetTime) + Fork, + Close, + Poll, + Mmap, + Mprotect, + Munmap, + Brk, + Sigaction, + Sigprocmask, + Sigreturn, + Ioctl, + Readv, // Vectored read (musl stdio) + Writev, // Vectored write (musl stdio) + Pipe, + Select, + Mremap, // Stub: returns -ENOMEM + Madvise, // Stub: returns 0 (advisory) + Dup, + Dup2, + Pause, + Nanosleep, + Getitimer, + Alarm, + Setitimer, + Fcntl, + GetPid, + Socket, + Connect, + Accept, + SendTo, + RecvFrom, + Shutdown, + Bind, + Listen, + Socketpair, + Exec, + Wait4, + Kill, + Getsockname, + Getpeername, + Setsockopt, + Clone, + Getsockopt, + SetPgid, + Getppid, + SetSid, + GetPgid, + GetSid, + Sigpending, + Sigsuspend, + Sigaltstack, + ArchPrctl, // x86_64 TLS setup (FS/GS base) + GetTid, + Futex, + SetTidAddress, + ClockGetTime, + ExitGroup, + Ppoll, // Stub: returns -ENOSYS + SetRobustList, // Stub: returns 0 + Pipe2, + GetRandom, // Filesystem syscalls - Access = 21, // Linux syscall number for access - Getcwd = 79, // Linux syscall number for getcwd - Chdir = 80, // Linux syscall number for chdir - Rename = 82, // Linux syscall number for rename - Mkdir = 83, // Linux syscall number for mkdir - Rmdir = 84, // Linux syscall number for rmdir - Link = 86, // Linux syscall number for link (hard links) - Unlink = 87, // Linux syscall number for unlink - Symlink = 88, // Linux syscall number for symlink - Readlink = 89, // Linux syscall number for readlink - Mknod = 133, // Linux syscall number for mknod (used for mkfifo) - Open = 257, // Breenix: filesystem open syscall - Lseek = 258, // Breenix: filesystem lseek syscall - Fstat = 259, // Breenix: filesystem fstat syscall - Getdents64 = 260, // Breenix: directory listing syscall + Access, + Getcwd, + Chdir, + Rename, + Mkdir, + Rmdir, + Link, + Unlink, + Symlink, + Readlink, + Mknod, + Open, + Lseek, + Fstat, + Getdents64, + Newfstatat, // Path-based file stat (AT_FDCWD support) + // *at variants (Linux ARM64 has these instead of legacy syscalls) + Openat, // openat(dirfd, path, flags, mode) - replacement for open + Dup3, // dup3(oldfd, newfd, flags) - replacement for dup2 + Faccessat, // faccessat(dirfd, path, mode, flags) + Mkdirat, // mkdirat(dirfd, path, mode) + Mknodat, // mknodat(dirfd, path, mode, dev) + Unlinkat, // unlinkat(dirfd, path, flags) - replaces unlink + rmdir + Symlinkat, // symlinkat(target, dirfd, linkpath) + Linkat, // linkat(olddirfd, oldpath, newdirfd, newpath, flags) + Renameat, // renameat(olddirfd, oldpath, newdirfd, newpath) + Readlinkat, // readlinkat(dirfd, path, buf, bufsiz) + Pselect6, // pselect6(nfds, readfds, writefds, exceptfds, timeout, sigmask) // PTY syscalls (Breenix-specific numbers) - PosixOpenpt = 400, // Breenix: open PTY master - Grantpt = 401, // Breenix: grant access to PTY slave - Unlockpt = 402, // Breenix: unlock PTY slave - Ptsname = 403, // Breenix: get PTY slave path + PosixOpenpt, + Grantpt, + Unlockpt, + Ptsname, // Graphics syscalls (Breenix-specific) - FbInfo = 410, // Breenix: get framebuffer info - FbDraw = 411, // Breenix: draw to framebuffer (left pane) - FbMmap = 412, // Breenix: mmap framebuffer into userspace - GetMousePos = 413, // Breenix: get mouse cursor position + FbInfo, + FbDraw, + FbMmap, + GetMousePos, // Audio syscalls (Breenix-specific) - AudioInit = 420, // Breenix: initialize audio stream - AudioWrite = 421, // Breenix: write PCM data to audio device + AudioInit, + AudioWrite, // Display takeover (Breenix-specific) - TakeOverDisplay = 431, // Breenix: userspace takes over display from kernel terminal manager - GiveBackDisplay = 432, // Breenix: userspace gives display back to kernel terminal manager - CowStats = 500, // Breenix: get Copy-on-Write statistics (for testing) - SimulateOom = 501, // Breenix: enable/disable OOM simulation (for testing) + TakeOverDisplay, + GiveBackDisplay, + // Testing (Breenix-specific) + CowStats, + SimulateOom, } #[allow(dead_code)] impl SyscallNumber { - /// Try to convert a u64 to a SyscallNumber + /// Try to convert a raw syscall number to a SyscallNumber. + /// + /// x86_64: Uses Linux x86_64 ABI numbers for musl libc compatibility. + /// ARM64: Uses legacy Breenix numbers (ARM64 Linux renumbering is future work). + #[cfg(target_arch = "x86_64")] pub fn from_u64(value: u64) -> Option { match value { - 0 => Some(Self::Exit), + // Linux x86_64 ABI numbers + 0 => Some(Self::Read), // was Breenix Exit=0 1 => Some(Self::Write), - 2 => Some(Self::Read), - 3 => Some(Self::Yield), - 4 => Some(Self::GetTime), - 5 => Some(Self::Fork), - 6 => Some(Self::Close), + 2 => Some(Self::Open), // Linux x86_64 open + 3 => Some(Self::Close), // was Breenix Yield=3 + 5 => Some(Self::Fstat), // was Breenix Fork=5 7 => Some(Self::Poll), + 8 => Some(Self::Lseek), // was Breenix 258 9 => Some(Self::Mmap), 10 => Some(Self::Mprotect), 11 => Some(Self::Munmap), @@ -166,8 +196,14 @@ impl SyscallNumber { 14 => Some(Self::Sigprocmask), 15 => Some(Self::Sigreturn), 16 => Some(Self::Ioctl), + 19 => Some(Self::Readv), // NEW + 20 => Some(Self::Writev), // NEW + 21 => Some(Self::Access), 22 => Some(Self::Pipe), 23 => Some(Self::Select), + 24 => Some(Self::Yield), // was Breenix 3 + 25 => Some(Self::Mremap), // NEW stub + 28 => Some(Self::Madvise), // NEW stub 32 => Some(Self::Dup), 33 => Some(Self::Dup2), 34 => Some(Self::Pause), @@ -176,7 +212,6 @@ impl SyscallNumber { 37 => Some(Self::Alarm), 38 => Some(Self::Setitimer), 39 => Some(Self::GetPid), - 72 => Some(Self::Fcntl), 41 => Some(Self::Socket), 42 => Some(Self::Connect), 43 => Some(Self::Accept), @@ -191,9 +226,21 @@ impl SyscallNumber { 54 => Some(Self::Setsockopt), 55 => Some(Self::Getsockopt), 56 => Some(Self::Clone), + 57 => Some(Self::Fork), // was Breenix 5 59 => Some(Self::Exec), + 60 => Some(Self::Exit), // was Breenix 0 61 => Some(Self::Wait4), 62 => Some(Self::Kill), + 72 => Some(Self::Fcntl), + 79 => Some(Self::Getcwd), + 80 => Some(Self::Chdir), + 82 => Some(Self::Rename), + 83 => Some(Self::Mkdir), + 84 => Some(Self::Rmdir), + 86 => Some(Self::Link), + 87 => Some(Self::Unlink), + 88 => Some(Self::Symlink), + 89 => Some(Self::Readlink), 109 => Some(Self::SetPgid), 110 => Some(Self::Getppid), 112 => Some(Self::SetSid), @@ -202,40 +249,154 @@ impl SyscallNumber { 127 => Some(Self::Sigpending), 130 => Some(Self::Sigsuspend), 131 => Some(Self::Sigaltstack), + 133 => Some(Self::Mknod), + 158 => Some(Self::ArchPrctl), // NEW 186 => Some(Self::GetTid), 202 => Some(Self::Futex), + 217 => Some(Self::Getdents64), // was Breenix 260 218 => Some(Self::SetTidAddress), 228 => Some(Self::ClockGetTime), 231 => Some(Self::ExitGroup), + 257 => Some(Self::Openat), // Linux x86_64 openat (was Breenix Open) + 258 => Some(Self::Mkdirat), + 259 => Some(Self::Mknodat), + 262 => Some(Self::Newfstatat), // NEW + 263 => Some(Self::Unlinkat), + 264 => Some(Self::Renameat), + 265 => Some(Self::Linkat), + 266 => Some(Self::Symlinkat), + 267 => Some(Self::Readlinkat), + 269 => Some(Self::Faccessat), + 270 => Some(Self::Pselect6), + 271 => Some(Self::Ppoll), // NEW stub + 273 => Some(Self::SetRobustList), // NEW stub + 292 => Some(Self::Dup3), 293 => Some(Self::Pipe2), 318 => Some(Self::GetRandom), - // Filesystem syscalls - 21 => Some(Self::Access), - 79 => Some(Self::Getcwd), - 80 => Some(Self::Chdir), - 82 => Some(Self::Rename), - 83 => Some(Self::Mkdir), - 84 => Some(Self::Rmdir), - 86 => Some(Self::Link), - 87 => Some(Self::Unlink), - 88 => Some(Self::Symlink), - 89 => Some(Self::Readlink), - 133 => Some(Self::Mknod), - 257 => Some(Self::Open), - 258 => Some(Self::Lseek), - 259 => Some(Self::Fstat), - 260 => Some(Self::Getdents64), - // PTY syscalls + // PTY syscalls (Breenix-specific, same on both archs) + 400 => Some(Self::PosixOpenpt), + 401 => Some(Self::Grantpt), + 402 => Some(Self::Unlockpt), + 403 => Some(Self::Ptsname), + // Graphics syscalls (Breenix-specific) + 410 => Some(Self::FbInfo), + 411 => Some(Self::FbDraw), + 412 => Some(Self::FbMmap), + 413 => Some(Self::GetMousePos), + // Audio syscalls (Breenix-specific) + 420 => Some(Self::AudioInit), + 421 => Some(Self::AudioWrite), + 431 => Some(Self::TakeOverDisplay), + 432 => Some(Self::GiveBackDisplay), + 500 => Some(Self::CowStats), + 501 => Some(Self::SimulateOom), + _ => None, + } + } + + /// ARM64: Uses Linux ARM64 (asm-generic/unistd.h) numbers for musl compatibility. + #[cfg(target_arch = "aarch64")] + pub fn from_u64(value: u64) -> Option { + match value { + // Linux ARM64 generic syscall numbers (from asm-generic/unistd.h) + // I/O + 17 => Some(Self::Getcwd), + 23 => Some(Self::Dup), + 24 => Some(Self::Dup3), + 25 => Some(Self::Fcntl), + 29 => Some(Self::Ioctl), + // Filesystem *at variants (ARM64 has no legacy open/mkdir/etc.) + 33 => Some(Self::Mknodat), + 34 => Some(Self::Mkdirat), + 35 => Some(Self::Unlinkat), + 36 => Some(Self::Symlinkat), + 37 => Some(Self::Linkat), + 38 => Some(Self::Renameat), + 48 => Some(Self::Faccessat), + 49 => Some(Self::Chdir), + 56 => Some(Self::Openat), + 57 => Some(Self::Close), + 59 => Some(Self::Pipe2), + 61 => Some(Self::Getdents64), + 62 => Some(Self::Lseek), + 63 => Some(Self::Read), + 64 => Some(Self::Write), + 65 => Some(Self::Readv), + 66 => Some(Self::Writev), + // I/O multiplexing + 72 => Some(Self::Pselect6), + 73 => Some(Self::Ppoll), + 78 => Some(Self::Readlinkat), + 79 => Some(Self::Newfstatat), + 80 => Some(Self::Fstat), + // Process management + 93 => Some(Self::Exit), + 94 => Some(Self::ExitGroup), + 96 => Some(Self::SetTidAddress), + 98 => Some(Self::Futex), + 99 => Some(Self::SetRobustList), + // Timers + 101 => Some(Self::Nanosleep), + 102 => Some(Self::Getitimer), + 103 => Some(Self::Setitimer), + 113 => Some(Self::ClockGetTime), + // Scheduling + 124 => Some(Self::Yield), + // Signals + 129 => Some(Self::Kill), + 132 => Some(Self::Sigaltstack), + 133 => Some(Self::Sigsuspend), + 134 => Some(Self::Sigaction), + 135 => Some(Self::Sigprocmask), + 136 => Some(Self::Sigpending), + 139 => Some(Self::Sigreturn), + // Session/process group + 154 => Some(Self::SetPgid), + 155 => Some(Self::GetPgid), + 156 => Some(Self::GetSid), + 157 => Some(Self::SetSid), + // Process info + 172 => Some(Self::GetPid), + 173 => Some(Self::Getppid), + 178 => Some(Self::GetTid), + // Socket + 198 => Some(Self::Socket), + 199 => Some(Self::Socketpair), + 200 => Some(Self::Bind), + 201 => Some(Self::Listen), + 202 => Some(Self::Accept), + 203 => Some(Self::Connect), + 204 => Some(Self::Getsockname), + 205 => Some(Self::Getpeername), + 206 => Some(Self::SendTo), + 207 => Some(Self::RecvFrom), + 208 => Some(Self::Setsockopt), + 209 => Some(Self::Getsockopt), + 210 => Some(Self::Shutdown), + // Memory + 214 => Some(Self::Brk), + 215 => Some(Self::Munmap), + 216 => Some(Self::Mremap), + 220 => Some(Self::Clone), + 221 => Some(Self::Exec), + 222 => Some(Self::Mmap), + 226 => Some(Self::Mprotect), + 233 => Some(Self::Madvise), + // Wait + 260 => Some(Self::Wait4), + // Random + 278 => Some(Self::GetRandom), + // PTY syscalls (Breenix-specific, same on both archs) 400 => Some(Self::PosixOpenpt), 401 => Some(Self::Grantpt), 402 => Some(Self::Unlockpt), 403 => Some(Self::Ptsname), - // Graphics syscalls + // Graphics syscalls (Breenix-specific) 410 => Some(Self::FbInfo), 411 => Some(Self::FbDraw), 412 => Some(Self::FbMmap), 413 => Some(Self::GetMousePos), - // Audio syscalls + // Audio syscalls (Breenix-specific) 420 => Some(Self::AudioInit), 421 => Some(Self::AudioWrite), 431 => Some(Self::TakeOverDisplay), diff --git a/kernel/src/syscall/wait.rs b/kernel/src/syscall/wait.rs index 5b007418..6b7acefe 100644 --- a/kernel/src/syscall/wait.rs +++ b/kernel/src/syscall/wait.rs @@ -144,11 +144,40 @@ pub fn sys_waitpid(pid: i64, status_ptr: u64, options: u32) -> SyscallResult { return SyscallResult::Ok(0); } - // Blocking wait + // Blocking wait: mark ourselves as BlockedOnChildExit FIRST, then re-check. + // + // CRITICAL: This ordering prevents a lost-wakeup TOCTOU race: + // 1. Set BlockedOnChildExit (now unblock_for_child_exit WILL find us) + // 2. Re-check child state (catches exit that happened during the window + // between our first check above and step 1) + // + // If the child exits BEFORE step 1: step 2 catches it (self-unblock + return) + // If the child exits AFTER step 1: unblock_for_child_exit succeeds (we're blocked) + // If the child exits DURING step 1: scheduler lock serializes the operations crate::task::scheduler::with_scheduler(|sched| { sched.block_current_for_child_exit(); }); + // Re-check child state to close the race window + { + let mg = crate::process::manager(); + if let Some(ref manager) = *mg { + if let Some(child) = manager.get_process(target_pid) { + if let crate::process::ProcessState::Terminated(exit_code) = child.state { + drop(mg); + // Child exited during the race window — self-unblock and return + crate::task::scheduler::with_scheduler(|sched| { + if let Some(thread) = sched.current_thread_mut() { + thread.blocked_in_syscall = false; + thread.set_ready(); + } + }); + return complete_wait(target_pid, exit_code, status_ptr); + } + } + } + } + crate::per_cpu::preempt_enable(); loop { @@ -214,10 +243,34 @@ pub fn sys_waitpid(pid: i64, status_ptr: u64, options: u32) -> SyscallResult { return SyscallResult::Ok(0); } + // Blocking wait: same TOCTOU prevention as the pid>0 path above. + // Set BlockedOnChildExit FIRST, then re-check all children. crate::task::scheduler::with_scheduler(|sched| { sched.block_current_for_child_exit(); }); + // Re-check all children to close the race window + { + let mg = crate::process::manager(); + if let Some(ref manager) = *mg { + for &child_pid in &children_copy { + if let Some(child) = manager.get_process(child_pid) { + if let crate::process::ProcessState::Terminated(exit_code) = child.state { + drop(mg); + // Child exited during the race window — self-unblock and return + crate::task::scheduler::with_scheduler(|sched| { + if let Some(thread) = sched.current_thread_mut() { + thread.blocked_in_syscall = false; + thread.set_ready(); + } + }); + return complete_wait(child_pid, exit_code, status_ptr); + } + } + } + } + } + crate::per_cpu::preempt_enable(); loop { diff --git a/kernel/src/task/process_task.rs b/kernel/src/task/process_task.rs index 2204d35f..53fc7a53 100644 --- a/kernel/src/task/process_task.rs +++ b/kernel/src/task/process_task.rs @@ -3,66 +3,133 @@ //! This module bridges the gap between the Process Manager and the Task Scheduler, //! allowing processes to be scheduled as tasks. +use crate::ipc::fd::FileDescriptor; use crate::process::ProcessId; use crate::task::scheduler; use crate::task::thread::{Thread, ThreadPrivilege}; +/// Close extracted file descriptor entries outside the PM lock. +/// +/// This performs the same cleanup as Process::close_all_fds() but operates on +/// a Vec of entries that were extracted from the FD table under PM lock via +/// Process::take_fd_entries(). This avoids holding PM lock during pipe wakeups, +/// PTY refcounting, TCP close, etc. +/// +/// CRITICAL: No PM lock is held when this runs. +fn close_extracted_fds(entries: alloc::vec::Vec<(usize, FileDescriptor)>) { + use crate::ipc::FdKind; + + for (_fd, fd_entry) in entries { + match fd_entry.kind { + FdKind::PipeRead(buffer) => { buffer.lock().close_read(); } + FdKind::PipeWrite(buffer) => { buffer.lock().close_write(); } + FdKind::TcpListener(port) => { crate::net::tcp::tcp_listener_ref_dec(port); } + FdKind::TcpConnection(conn_id) => { let _ = crate::net::tcp::tcp_close(&conn_id); } + FdKind::PtyMaster(pty_num) => { + if let Some(pair) = crate::tty::pty::get(pty_num) { + let old_count = pair.master_refcount.fetch_sub(1, core::sync::atomic::Ordering::SeqCst); + if old_count == 1 { + crate::tty::pty::release(pty_num); + } + } + } + FdKind::PtySlave(pty_num) => { + if let Some(pair) = crate::tty::pty::get(pty_num) { + pair.slave_close(); + } + } + FdKind::UnixStream(socket) => { socket.lock().close(); } + FdKind::FifoRead(path, buffer) => { + crate::ipc::fifo::close_fifo_read(&path); + buffer.lock().close_read(); + } + FdKind::FifoWrite(path, buffer) => { + crate::ipc::fifo::close_fifo_write(&path); + buffer.lock().close_write(); + } + _ => {} // StdIo, RegularFile, Directory, Device, etc. — no action needed + } + } +} + /// Integration functions for scheduling processes as tasks pub struct ProcessScheduler; impl ProcessScheduler { - /// Handle process exit from scheduler context - /// Called when a userspace thread exits + /// Handle process exit from scheduler context. + /// + /// Two-phase design to minimize PM lock hold time and prevent deadlocks: + /// + /// Phase 1 (under PM lock): Mark process terminated, extract FD entries, + /// set SIGCHLD on parent, collect parent thread ID for wakeup. + /// No logging, no pipe wakeups, no scheduler calls. + /// + /// Phase 2 (no PM lock): Close extracted FDs (pipe wakeups, PTY cleanup), + /// wake parent thread via scheduler, log the exit. + /// + /// This prevents a system-wide hang on ARM64 SMP where the PM lock (acquired + /// with interrupts disabled on all CPUs) combined with logging (which acquires + /// SERIAL and framebuffer locks) creates an unbreakable deadlock. pub fn handle_thread_exit(thread_id: u64, exit_code: i32) { - log::debug!("Thread {} exited with code {}", thread_id, exit_code); - - // Find which process this thread belongs to - if let Some(ref mut manager) = *crate::process::manager() { - if let Some((pid, process)) = manager.find_process_by_thread_mut(thread_id) { - log::info!( - "Process {} (thread {}) exited with code {}", - pid.as_u64(), - thread_id, - exit_code - ); - - // Get parent PID before terminating (needed for SIGCHLD) - let parent_pid = process.parent; - - process.terminate(exit_code); - - #[cfg(feature = "btrt")] - crate::test_framework::btrt::on_process_exit(pid.as_u64(), exit_code); - - // Send SIGCHLD to the parent process and wake it if blocked on waitpid - if let Some(parent_pid) = parent_pid { - if let Some(parent_process) = manager.get_process_mut(parent_pid) { - use crate::signal::constants::SIGCHLD; - parent_process.signals.set_pending(SIGCHLD); - - // Get parent's main thread ID to wake it if blocked on waitpid - let parent_thread_id = parent_process.main_thread.as_ref().map(|t| t.id); - - log::debug!( - "Sent SIGCHLD to parent process {} for child {} exit", - parent_pid.as_u64(), - pid.as_u64() - ); - - // Wake up the parent thread if it's blocked on waitpid or pause() - if let Some(parent_tid) = parent_thread_id { - scheduler::with_scheduler(|sched| { - // Wake if blocked on waitpid (BlockedOnChildExit) - sched.unblock_for_child_exit(parent_tid); - // Also wake if blocked on pause() or other signal wait (BlockedOnSignal) - sched.unblock_for_signal(parent_tid); - }); + // Phase 1: Under PM lock — minimal work only + let phase1_result = { + if let Some(ref mut manager) = *crate::process::manager() { + if let Some((pid, process)) = manager.find_process_by_thread_mut(thread_id) { + let parent_pid = process.parent; + let process_name = process.name.clone(); + + // Mark terminated and extract FDs without closing them + process.terminate_minimal(exit_code); + let fd_entries = process.take_fd_entries(); + // CoW cleanup is fast (no logging, no locks besides frame allocator) + process.cleanup_cow_frames(); + process.drain_old_page_tables(); + + #[cfg(feature = "btrt")] + crate::test_framework::btrt::on_process_exit(pid.as_u64(), exit_code); + + // Set SIGCHLD on parent and get parent thread ID for wakeup + let parent_tid = if let Some(parent_pid) = parent_pid { + if let Some(parent_process) = manager.get_process_mut(parent_pid) { + use crate::signal::constants::SIGCHLD; + parent_process.signals.set_pending(SIGCHLD); + parent_process.main_thread.as_ref().map(|t| t.id) + } else { + None } - } - } + } else { + None + }; - // TODO: Clean up process resources + Some((pid, process_name, fd_entries, parent_tid)) + } else { + None + } + } else { + None } + }; // PM lock dropped here + + // Phase 2: No PM lock — safe to do pipe wakeups, scheduler calls, logging + if let Some((pid, process_name, fd_entries, parent_tid)) = phase1_result { + // Close FDs outside PM lock (pipe close_write wakes readers, etc.) + close_extracted_fds(fd_entries); + + // Wake parent thread if blocked on waitpid or pause() + if let Some(parent_tid) = parent_tid { + scheduler::with_scheduler(|sched| { + sched.unblock_for_child_exit(parent_tid); + sched.unblock_for_signal(parent_tid); + }); + } + + log::debug!( + "Process {} '{}' (thread {}) exited with code {}", + pid.as_u64(), + process_name, + thread_id, + exit_code + ); } } diff --git a/kernel/src/test_framework/registry.rs b/kernel/src/test_framework/registry.rs index 47b309e3..746f4d92 100644 --- a/kernel/src/test_framework/registry.rs +++ b/kernel/src/test_framework/registry.rs @@ -2866,35 +2866,57 @@ fn test_arm64_signal_frame_conversion() -> TestResult { fn test_syscall_dispatch() -> TestResult { use crate::syscall::SyscallNumber; - // Test that we can convert known syscall numbers - // These are fundamental syscalls that should always exist + // Test that we can convert known syscall numbers. + // Numbers differ per architecture (Linux ABI): + // x86_64: exit=60, write=1, read=0, getpid=39 + // ARM64: exit=93, write=64, read=63, getpid=172 - // Test SYS_exit (0) - match SyscallNumber::from_u64(0) { - Some(SyscallNumber::Exit) => {} - Some(_) => return TestResult::Fail("syscall 0 should be Exit"), - None => return TestResult::Fail("syscall 0 not recognized"), - } - - // Test SYS_write (1) - match SyscallNumber::from_u64(1) { - Some(SyscallNumber::Write) => {} - Some(_) => return TestResult::Fail("syscall 1 should be Write"), - None => return TestResult::Fail("syscall 1 not recognized"), - } - - // Test SYS_read (2) - match SyscallNumber::from_u64(2) { - Some(SyscallNumber::Read) => {} - Some(_) => return TestResult::Fail("syscall 2 should be Read"), - None => return TestResult::Fail("syscall 2 not recognized"), + #[cfg(target_arch = "x86_64")] + { + match SyscallNumber::from_u64(60) { + Some(SyscallNumber::Exit) => {} + Some(_) => return TestResult::Fail("syscall 60 should be Exit"), + None => return TestResult::Fail("syscall 60 not recognized"), + } + match SyscallNumber::from_u64(1) { + Some(SyscallNumber::Write) => {} + Some(_) => return TestResult::Fail("syscall 1 should be Write"), + None => return TestResult::Fail("syscall 1 not recognized"), + } + match SyscallNumber::from_u64(0) { + Some(SyscallNumber::Read) => {} + Some(_) => return TestResult::Fail("syscall 0 should be Read"), + None => return TestResult::Fail("syscall 0 not recognized"), + } + match SyscallNumber::from_u64(39) { + Some(SyscallNumber::GetPid) => {} + Some(_) => return TestResult::Fail("syscall 39 should be GetPid"), + None => return TestResult::Fail("syscall 39 not recognized"), + } } - // Test SYS_getpid (39) - match SyscallNumber::from_u64(39) { - Some(SyscallNumber::GetPid) => {} - Some(_) => return TestResult::Fail("syscall 39 should be GetPid"), - None => return TestResult::Fail("syscall 39 not recognized"), + #[cfg(target_arch = "aarch64")] + { + match SyscallNumber::from_u64(93) { + Some(SyscallNumber::Exit) => {} + Some(_) => return TestResult::Fail("syscall 93 should be Exit"), + None => return TestResult::Fail("syscall 93 not recognized"), + } + match SyscallNumber::from_u64(64) { + Some(SyscallNumber::Write) => {} + Some(_) => return TestResult::Fail("syscall 64 should be Write"), + None => return TestResult::Fail("syscall 64 not recognized"), + } + match SyscallNumber::from_u64(63) { + Some(SyscallNumber::Read) => {} + Some(_) => return TestResult::Fail("syscall 63 should be Read"), + None => return TestResult::Fail("syscall 63 not recognized"), + } + match SyscallNumber::from_u64(172) { + Some(SyscallNumber::GetPid) => {} + Some(_) => return TestResult::Fail("syscall 172 should be GetPid"), + None => return TestResult::Fail("syscall 172 not recognized"), + } } // Test that invalid syscall numbers return None diff --git a/libs/libbreenix-libc/src/lib.rs b/libs/libbreenix-libc/src/lib.rs index 468fe5b5..caa3705e 100644 --- a/libs/libbreenix-libc/src/lib.rs +++ b/libs/libbreenix-libc/src/lib.rs @@ -27,6 +27,13 @@ use libbreenix::types::Fd; use libbreenix::error::Error; use core::slice; +/// AT_FDCWD as raw u64 for direct syscall use (-100 as u64) +#[cfg(target_arch = "aarch64")] +const AT_FDCWD_RAW: u64 = (-100i32) as u32 as u64; +/// AT_REMOVEDIR flag for unlinkat +#[cfg(target_arch = "aarch64")] +const AT_REMOVEDIR: u64 = 0x200; + // ============================================================================= // Panic Handler // ============================================================================= @@ -285,19 +292,31 @@ pub unsafe extern "C" fn open(path: *const u8, flags: i32, mode: u32) -> i32 { return -1; } - let result = libbreenix::raw::syscall3( - libbreenix::syscall::nr::OPEN, - path as u64, - flags as u64, - mode as u64, - ) as i64; + let result = { + #[cfg(target_arch = "x86_64")] + { + libbreenix::raw::syscall3( + libbreenix::syscall::nr::OPEN, + path as u64, + flags as u64, + mode as u64, + ) as i64 + } + #[cfg(target_arch = "aarch64")] + { + libbreenix::raw::syscall4( + libbreenix::syscall::nr::OPENAT, + AT_FDCWD_RAW, + path as u64, + flags as u64, + mode as u64, + ) as i64 + } + }; syscall_result_to_c_int(result) } /// openat - open a file relative to a directory fd -/// -/// If dirfd is AT_FDCWD (-100), delegates to open() with the given path. -/// Otherwise, returns -ENOSYS (not yet supported). #[no_mangle] pub unsafe extern "C" fn openat(dirfd: i32, path: *const u8, flags: i32, mode: u32) -> i32 { if path.is_null() { @@ -305,14 +324,14 @@ pub unsafe extern "C" fn openat(dirfd: i32, path: *const u8, flags: i32, mode: u return -1; } - // AT_FDCWD = -100: use current working directory (same as open) - if dirfd == -100 { - return open(path, flags, mode); - } - - // Non-AT_FDCWD dirfd not supported yet - ERRNO = ENOSYS; - -1 + let result = libbreenix::raw::syscall4( + libbreenix::syscall::nr::OPENAT, + dirfd as u64, + path as u64, + flags as u64, + mode as u64, + ) as i64; + syscall_result_to_c_int(result) } /// fstat - get file status by fd @@ -409,12 +428,27 @@ pub unsafe extern "C" fn readlink(path: *const u8, buf: *mut u8, bufsiz: usize) return -1; } - let result = libbreenix::raw::syscall3( - libbreenix::syscall::nr::READLINK, - path as u64, - buf as u64, - bufsiz as u64, - ) as i64; + let result = { + #[cfg(target_arch = "x86_64")] + { + libbreenix::raw::syscall3( + libbreenix::syscall::nr::READLINK, + path as u64, + buf as u64, + bufsiz as u64, + ) as i64 + } + #[cfg(target_arch = "aarch64")] + { + libbreenix::raw::syscall4( + libbreenix::syscall::nr::READLINKAT, + AT_FDCWD_RAW, + path as u64, + buf as u64, + bufsiz as u64, + ) as i64 + } + }; syscall_result_to_c_ssize(result) } @@ -426,10 +460,24 @@ pub unsafe extern "C" fn unlink(path: *const u8) -> i32 { return -1; } - let result = libbreenix::raw::syscall1( - libbreenix::syscall::nr::UNLINK, - path as u64, - ) as i64; + let result = { + #[cfg(target_arch = "x86_64")] + { + libbreenix::raw::syscall1( + libbreenix::syscall::nr::UNLINK, + path as u64, + ) as i64 + } + #[cfg(target_arch = "aarch64")] + { + libbreenix::raw::syscall3( + libbreenix::syscall::nr::UNLINKAT, + AT_FDCWD_RAW, + path as u64, + 0, // flags=0 means unlink (not rmdir) + ) as i64 + } + }; syscall_result_to_c_int(result) } @@ -441,11 +489,26 @@ pub unsafe extern "C" fn rename(oldpath: *const u8, newpath: *const u8) -> i32 { return -1; } - let result = libbreenix::raw::syscall2( - libbreenix::syscall::nr::RENAME, - oldpath as u64, - newpath as u64, - ) as i64; + let result = { + #[cfg(target_arch = "x86_64")] + { + libbreenix::raw::syscall2( + libbreenix::syscall::nr::RENAME, + oldpath as u64, + newpath as u64, + ) as i64 + } + #[cfg(target_arch = "aarch64")] + { + libbreenix::raw::syscall4( + libbreenix::syscall::nr::RENAMEAT, + AT_FDCWD_RAW, + oldpath as u64, + AT_FDCWD_RAW, + newpath as u64, + ) as i64 + } + }; syscall_result_to_c_int(result) } @@ -457,11 +520,25 @@ pub unsafe extern "C" fn mkdir(path: *const u8, mode: u32) -> i32 { return -1; } - let result = libbreenix::raw::syscall2( - libbreenix::syscall::nr::MKDIR, - path as u64, - mode as u64, - ) as i64; + let result = { + #[cfg(target_arch = "x86_64")] + { + libbreenix::raw::syscall2( + libbreenix::syscall::nr::MKDIR, + path as u64, + mode as u64, + ) as i64 + } + #[cfg(target_arch = "aarch64")] + { + libbreenix::raw::syscall3( + libbreenix::syscall::nr::MKDIRAT, + AT_FDCWD_RAW, + path as u64, + mode as u64, + ) as i64 + } + }; syscall_result_to_c_int(result) } @@ -473,10 +550,24 @@ pub unsafe extern "C" fn rmdir(path: *const u8) -> i32 { return -1; } - let result = libbreenix::raw::syscall1( - libbreenix::syscall::nr::RMDIR, - path as u64, - ) as i64; + let result = { + #[cfg(target_arch = "x86_64")] + { + libbreenix::raw::syscall1( + libbreenix::syscall::nr::RMDIR, + path as u64, + ) as i64 + } + #[cfg(target_arch = "aarch64")] + { + libbreenix::raw::syscall3( + libbreenix::syscall::nr::UNLINKAT, + AT_FDCWD_RAW, + path as u64, + AT_REMOVEDIR, + ) as i64 + } + }; syscall_result_to_c_int(result) } @@ -488,27 +579,53 @@ pub unsafe extern "C" fn link(oldpath: *const u8, newpath: *const u8) -> i32 { return -1; } - let result = libbreenix::raw::syscall2( - libbreenix::syscall::nr::LINK, - oldpath as u64, - newpath as u64, - ) as i64; + let result = { + #[cfg(target_arch = "x86_64")] + { + libbreenix::raw::syscall2( + libbreenix::syscall::nr::LINK, + oldpath as u64, + newpath as u64, + ) as i64 + } + #[cfg(target_arch = "aarch64")] + { + libbreenix::raw::syscall5( + libbreenix::syscall::nr::LINKAT, + AT_FDCWD_RAW, + oldpath as u64, + AT_FDCWD_RAW, + newpath as u64, + 0, // flags + ) as i64 + } + }; syscall_result_to_c_int(result) } /// linkat - create a hard link relative to directory file descriptors -/// -/// We ignore dirfd and flags, treating paths as absolute (Breenix doesn't -/// support AT_FDCWD/AT_ operations yet). This is sufficient for std::fs::hard_link. #[no_mangle] pub unsafe extern "C" fn linkat( - _olddirfd: i32, + olddirfd: i32, oldpath: *const u8, - _newdirfd: i32, + newdirfd: i32, newpath: *const u8, - _flags: i32, + flags: i32, ) -> i32 { - link(oldpath, newpath) + if oldpath.is_null() || newpath.is_null() { + ERRNO = EFAULT; + return -1; + } + + let result = libbreenix::raw::syscall5( + libbreenix::syscall::nr::LINKAT, + olddirfd as u64, + oldpath as u64, + newdirfd as u64, + newpath as u64, + flags as u64, + ) as i64; + syscall_result_to_c_int(result) } /// symlink - create a symbolic link @@ -519,11 +636,25 @@ pub unsafe extern "C" fn symlink(target: *const u8, linkpath: *const u8) -> i32 return -1; } - let result = libbreenix::raw::syscall2( - libbreenix::syscall::nr::SYMLINK, - target as u64, - linkpath as u64, - ) as i64; + let result = { + #[cfg(target_arch = "x86_64")] + { + libbreenix::raw::syscall2( + libbreenix::syscall::nr::SYMLINK, + target as u64, + linkpath as u64, + ) as i64 + } + #[cfg(target_arch = "aarch64")] + { + libbreenix::raw::syscall3( + libbreenix::syscall::nr::SYMLINKAT, + target as u64, + AT_FDCWD_RAW, + linkpath as u64, + ) as i64 + } + }; syscall_result_to_c_int(result) } @@ -535,11 +666,26 @@ pub unsafe extern "C" fn access(path: *const u8, mode: i32) -> i32 { return -1; } - let result = libbreenix::raw::syscall2( - libbreenix::syscall::nr::ACCESS, - path as u64, - mode as u64, - ) as i64; + let result = { + #[cfg(target_arch = "x86_64")] + { + libbreenix::raw::syscall2( + libbreenix::syscall::nr::ACCESS, + path as u64, + mode as u64, + ) as i64 + } + #[cfg(target_arch = "aarch64")] + { + libbreenix::raw::syscall4( + libbreenix::syscall::nr::FACCESSAT, + AT_FDCWD_RAW, + path as u64, + mode as u64, + 0, // flags + ) as i64 + } + }; syscall_result_to_c_int(result) } @@ -860,7 +1006,7 @@ pub unsafe extern "C" fn mmap( offset: i64, ) -> *mut u8 { let result = libbreenix::raw::syscall6( - 9, // MMAP syscall number + libbreenix::syscall::nr::MMAP, addr as u64, len as u64, prot as u64, @@ -1776,14 +1922,31 @@ pub unsafe extern "C" fn select( exceptfds: *mut u8, timeout: *mut u8, ) -> i32 { - let result = libbreenix::raw::syscall5( - libbreenix::syscall::nr::SELECT, - nfds as u64, - readfds as u64, - writefds as u64, - exceptfds as u64, - timeout as u64, - ) as i64; + let result = { + #[cfg(target_arch = "x86_64")] + { + libbreenix::raw::syscall5( + libbreenix::syscall::nr::SELECT, + nfds as u64, + readfds as u64, + writefds as u64, + exceptfds as u64, + timeout as u64, + ) as i64 + } + #[cfg(target_arch = "aarch64")] + { + libbreenix::raw::syscall6( + libbreenix::syscall::nr::PSELECT6, + nfds as u64, + readfds as u64, + writefds as u64, + exceptfds as u64, + timeout as u64, + 0, // NULL sigmask + ) as i64 + } + }; syscall_result_to_c_int(result) } @@ -1794,12 +1957,43 @@ pub unsafe extern "C" fn select( /// poll - wait for events on file descriptors #[no_mangle] pub unsafe extern "C" fn poll(fds: *mut u8, nfds: u64, timeout: i32) -> i32 { - let result = libbreenix::raw::syscall3( - libbreenix::syscall::nr::POLL, - fds as u64, - nfds, - timeout as u64, - ) as i64; + let result = { + #[cfg(target_arch = "x86_64")] + { + libbreenix::raw::syscall3( + libbreenix::syscall::nr::POLL, + fds as u64, + nfds, + timeout as u64, + ) as i64 + } + #[cfg(target_arch = "aarch64")] + { + if timeout < 0 { + libbreenix::raw::syscall5( + libbreenix::syscall::nr::PPOLL, + fds as u64, + nfds, + 0, // NULL timespec = infinite + 0, // NULL sigmask + 0, // sigsetsize + ) as i64 + } else { + let ts = libbreenix::types::Timespec { + tv_sec: (timeout / 1000) as i64, + tv_nsec: ((timeout % 1000) as i64) * 1_000_000, + }; + libbreenix::raw::syscall5( + libbreenix::syscall::nr::PPOLL, + fds as u64, + nfds, + &ts as *const libbreenix::types::Timespec as u64, + 0, // NULL sigmask + 0, // sigsetsize + ) as i64 + } + } + }; syscall_result_to_c_int(result) } @@ -1816,14 +2010,14 @@ pub extern "C" fn pause() -> i32 { /// syscall - generic syscall interface #[no_mangle] pub unsafe extern "C" fn syscall(num: i64, a1: i64, a2: i64, a3: i64, a4: i64, a5: i64, a6: i64) -> i64 { - const SYS_FUTEX: i64 = 202; - const SYS_GETRANDOM: i64 = 318; + let sys_futex = libbreenix::syscall::nr::FUTEX as i64; + let sys_getrandom = libbreenix::syscall::nr::GETRANDOM as i64; match num { - SYS_FUTEX => { + n if n == sys_futex => { 0 } - SYS_GETRANDOM => { + n if n == sys_getrandom => { -(ENOSYS as i64) } _ => { diff --git a/libs/libbreenix/src/fs.rs b/libs/libbreenix/src/fs.rs index 835e4247..a1d02b91 100644 --- a/libs/libbreenix/src/fs.rs +++ b/libs/libbreenix/src/fs.rs @@ -56,6 +56,13 @@ impl CPath { } } +/// AT_FDCWD: Use current working directory for *at syscall variants (ARM64 Linux) +#[cfg(target_arch = "aarch64")] +const AT_FDCWD: u64 = (-100i64) as u64; +/// AT_REMOVEDIR flag for unlinkat (behave like rmdir) +#[cfg(target_arch = "aarch64")] +const AT_REMOVEDIR: u64 = 0x200; + /// Open flags (POSIX compatible) pub const O_RDONLY: u32 = 0; pub const O_WRONLY: u32 = 1; @@ -172,12 +179,10 @@ impl Stat { pub fn open(path: &str, flags: u32) -> Result { let cpath = CPath::new(path)?; let ret = unsafe { - raw::syscall3( - nr::OPEN, - cpath.as_u64(), - flags as u64, - 0, // mode (not used for O_RDONLY) - ) as i64 + #[cfg(target_arch = "x86_64")] + { raw::syscall3(nr::OPEN, cpath.as_u64(), flags as u64, 0) as i64 } + #[cfg(target_arch = "aarch64")] + { raw::syscall4(nr::OPENAT, AT_FDCWD, cpath.as_u64(), flags as u64, 0) as i64 } }; Error::from_syscall(ret).map(Fd::from_raw) } @@ -195,12 +200,10 @@ pub fn open(path: &str, flags: u32) -> Result { pub fn open_with_mode(path: &str, flags: u32, mode: u32) -> Result { let cpath = CPath::new(path)?; let ret = unsafe { - raw::syscall3( - nr::OPEN, - cpath.as_u64(), - flags as u64, - mode as u64, - ) as i64 + #[cfg(target_arch = "x86_64")] + { raw::syscall3(nr::OPEN, cpath.as_u64(), flags as u64, mode as u64) as i64 } + #[cfg(target_arch = "aarch64")] + { raw::syscall4(nr::OPENAT, AT_FDCWD, cpath.as_u64(), flags as u64, mode as u64) as i64 } }; Error::from_syscall(ret).map(Fd::from_raw) } @@ -226,7 +229,12 @@ pub fn open_with_mode(path: &str, flags: u32, mode: u32) -> Result { #[inline] pub fn access(path: &str, mode: u32) -> Result<(), Error> { let cpath = CPath::new(path)?; - let ret = unsafe { raw::syscall2(nr::ACCESS, cpath.as_u64(), mode as u64) as i64 }; + let ret = unsafe { + #[cfg(target_arch = "x86_64")] + { raw::syscall2(nr::ACCESS, cpath.as_u64(), mode as u64) as i64 } + #[cfg(target_arch = "aarch64")] + { raw::syscall4(nr::FACCESSAT, AT_FDCWD, cpath.as_u64(), mode as u64, 0) as i64 } + }; Error::from_syscall(ret).map(|_| ()) } @@ -457,7 +465,12 @@ pub fn getdents64(fd: Fd, buf: &mut [u8]) -> Result { #[inline] pub fn unlink(path: &str) -> Result<(), Error> { let cpath = CPath::new(path)?; - let ret = unsafe { raw::syscall1(nr::UNLINK, cpath.as_u64()) as i64 }; + let ret = unsafe { + #[cfg(target_arch = "x86_64")] + { raw::syscall1(nr::UNLINK, cpath.as_u64()) as i64 } + #[cfg(target_arch = "aarch64")] + { raw::syscall3(nr::UNLINKAT, AT_FDCWD, cpath.as_u64(), 0) as i64 } + }; Error::from_syscall(ret).map(|_| ()) } @@ -533,7 +546,12 @@ impl<'a> Iterator for DirentIter<'a> { #[inline] pub fn mkdir(path: &str, mode: u32) -> Result<(), Error> { let cpath = CPath::new(path)?; - let ret = unsafe { raw::syscall2(nr::MKDIR, cpath.as_u64(), mode as u64) as i64 }; + let ret = unsafe { + #[cfg(target_arch = "x86_64")] + { raw::syscall2(nr::MKDIR, cpath.as_u64(), mode as u64) as i64 } + #[cfg(target_arch = "aarch64")] + { raw::syscall3(nr::MKDIRAT, AT_FDCWD, cpath.as_u64(), mode as u64) as i64 } + }; Error::from_syscall(ret).map(|_| ()) } @@ -562,7 +580,12 @@ pub fn mkdir(path: &str, mode: u32) -> Result<(), Error> { #[inline] pub fn rmdir(path: &str) -> Result<(), Error> { let cpath = CPath::new(path)?; - let ret = unsafe { raw::syscall1(nr::RMDIR, cpath.as_u64()) as i64 }; + let ret = unsafe { + #[cfg(target_arch = "x86_64")] + { raw::syscall1(nr::RMDIR, cpath.as_u64()) as i64 } + #[cfg(target_arch = "aarch64")] + { raw::syscall3(nr::UNLINKAT, AT_FDCWD, cpath.as_u64(), AT_REMOVEDIR) as i64 } + }; Error::from_syscall(ret).map(|_| ()) } @@ -596,7 +619,10 @@ pub fn rename(oldpath: &str, newpath: &str) -> Result<(), Error> { let cold = CPath::new(oldpath)?; let cnew = CPath::new(newpath)?; let ret = unsafe { - raw::syscall2(nr::RENAME, cold.as_u64(), cnew.as_u64()) as i64 + #[cfg(target_arch = "x86_64")] + { raw::syscall2(nr::RENAME, cold.as_u64(), cnew.as_u64()) as i64 } + #[cfg(target_arch = "aarch64")] + { raw::syscall4(nr::RENAMEAT, AT_FDCWD, cold.as_u64(), AT_FDCWD, cnew.as_u64()) as i64 } }; Error::from_syscall(ret).map(|_| ()) } @@ -634,7 +660,10 @@ pub fn link(oldpath: &str, newpath: &str) -> Result<(), Error> { let cold = CPath::new(oldpath)?; let cnew = CPath::new(newpath)?; let ret = unsafe { - raw::syscall2(nr::LINK, cold.as_u64(), cnew.as_u64()) as i64 + #[cfg(target_arch = "x86_64")] + { raw::syscall2(nr::LINK, cold.as_u64(), cnew.as_u64()) as i64 } + #[cfg(target_arch = "aarch64")] + { raw::syscall5(nr::LINKAT, AT_FDCWD, cold.as_u64(), AT_FDCWD, cnew.as_u64(), 0) as i64 } }; Error::from_syscall(ret).map(|_| ()) } @@ -673,7 +702,10 @@ pub fn symlink(target: &str, linkpath: &str) -> Result<(), Error> { let ctarget = CPath::new(target)?; let clink = CPath::new(linkpath)?; let ret = unsafe { - raw::syscall2(nr::SYMLINK, ctarget.as_u64(), clink.as_u64()) as i64 + #[cfg(target_arch = "x86_64")] + { raw::syscall2(nr::SYMLINK, ctarget.as_u64(), clink.as_u64()) as i64 } + #[cfg(target_arch = "aarch64")] + { raw::syscall3(nr::SYMLINKAT, ctarget.as_u64(), AT_FDCWD, clink.as_u64()) as i64 } }; Error::from_syscall(ret).map(|_| ()) } @@ -711,12 +743,10 @@ pub fn symlink(target: &str, linkpath: &str) -> Result<(), Error> { pub fn readlink(pathname: &str, buf: &mut [u8]) -> Result { let cpath = CPath::new(pathname)?; let ret = unsafe { - raw::syscall3( - nr::READLINK, - cpath.as_u64(), - buf.as_mut_ptr() as u64, - buf.len() as u64, - ) as i64 + #[cfg(target_arch = "x86_64")] + { raw::syscall3(nr::READLINK, cpath.as_u64(), buf.as_mut_ptr() as u64, buf.len() as u64) as i64 } + #[cfg(target_arch = "aarch64")] + { raw::syscall4(nr::READLINKAT, AT_FDCWD, cpath.as_u64(), buf.as_mut_ptr() as u64, buf.len() as u64) as i64 } }; Error::from_syscall(ret).map(|n| n as usize) } @@ -752,12 +782,10 @@ pub fn mkfifo(pathname: &str, mode: u32) -> Result<(), Error> { let cpath = CPath::new(pathname)?; // mkfifo is implemented via mknod with S_IFIFO mode let ret = unsafe { - raw::syscall3( - nr::MKNOD, - cpath.as_u64(), - (S_IFIFO | (mode & 0o777)) as u64, - 0, // dev number (unused for FIFOs) - ) as i64 + #[cfg(target_arch = "x86_64")] + { raw::syscall3(nr::MKNOD, cpath.as_u64(), (S_IFIFO | (mode & 0o777)) as u64, 0) as i64 } + #[cfg(target_arch = "aarch64")] + { raw::syscall4(nr::MKNODAT, AT_FDCWD, cpath.as_u64(), (S_IFIFO | (mode & 0o777)) as u64, 0) as i64 } }; Error::from_syscall(ret).map(|_| ()) } diff --git a/libs/libbreenix/src/io.rs b/libs/libbreenix/src/io.rs index 026de190..accbb311 100644 --- a/libs/libbreenix/src/io.rs +++ b/libs/libbreenix/src/io.rs @@ -119,7 +119,12 @@ pub fn close(fd: Fd) -> Result<(), Error> { #[inline] pub fn pipe() -> Result<(Fd, Fd), Error> { let mut pipefd = [0i32; 2]; - let ret = unsafe { raw::syscall1(nr::PIPE, pipefd.as_mut_ptr() as u64) }; + let ret = unsafe { + #[cfg(target_arch = "x86_64")] + { raw::syscall1(nr::PIPE, pipefd.as_mut_ptr() as u64) } + #[cfg(target_arch = "aarch64")] + { raw::syscall2(nr::PIPE2, pipefd.as_mut_ptr() as u64, 0) } + }; Error::from_syscall(ret as i64).map(|_| { (Fd::from_raw(pipefd[0] as u64), Fd::from_raw(pipefd[1] as u64)) }) @@ -177,7 +182,12 @@ pub fn dup(old_fd: Fd) -> Result { /// `new_fd` on success, `Err(Error)` on error. #[inline] pub fn dup2(old_fd: Fd, new_fd: Fd) -> Result { - let ret = unsafe { raw::syscall2(nr::DUP2, old_fd.raw(), new_fd.raw()) }; + let ret = unsafe { + #[cfg(target_arch = "x86_64")] + { raw::syscall2(nr::DUP2, old_fd.raw(), new_fd.raw()) } + #[cfg(target_arch = "aarch64")] + { raw::syscall3(nr::DUP3, old_fd.raw(), new_fd.raw(), 0) } + }; Error::from_syscall(ret as i64).map(|v| Fd::from_raw(v)) } @@ -322,12 +332,44 @@ impl PollFd { #[inline] pub fn poll(fds: &mut [PollFd], timeout: i32) -> Result { let ret = unsafe { - raw::syscall3( - nr::POLL, - fds.as_mut_ptr() as u64, - fds.len() as u64, - timeout as u64, - ) + #[cfg(target_arch = "x86_64")] + { + raw::syscall3( + nr::POLL, + fds.as_mut_ptr() as u64, + fds.len() as u64, + timeout as u64, + ) + } + // ARM64 Linux has no poll; use ppoll(fds, nfds, timeout_ts, NULL, 0) + #[cfg(target_arch = "aarch64")] + { + if timeout < 0 { + // Infinite timeout: pass NULL timespec + raw::syscall5( + nr::PPOLL, + fds.as_mut_ptr() as u64, + fds.len() as u64, + 0, // NULL timespec = infinite + 0, // NULL sigmask + 0, // sigsetsize + ) + } else { + // Convert milliseconds to timespec + let ts = crate::types::Timespec { + tv_sec: (timeout / 1000) as i64, + tv_nsec: ((timeout % 1000) as i64) * 1_000_000, + }; + raw::syscall5( + nr::PPOLL, + fds.as_mut_ptr() as u64, + fds.len() as u64, + &ts as *const crate::types::Timespec as u64, + 0, // NULL sigmask + 0, // sigsetsize + ) + } + } }; Error::from_syscall(ret as i64).map(|v| v as usize) } @@ -402,14 +444,30 @@ pub fn select( let exceptfds_ptr = exceptfds.map(|p| p as *mut FdSet as u64).unwrap_or(0); let ret = unsafe { - raw::syscall5( - nr::SELECT, - nfds as u64, - readfds_ptr, - writefds_ptr, - exceptfds_ptr, - timeout_ptr, - ) + #[cfg(target_arch = "x86_64")] + { + raw::syscall5( + nr::SELECT, + nfds as u64, + readfds_ptr, + writefds_ptr, + exceptfds_ptr, + timeout_ptr, + ) + } + // ARM64 Linux has no select; use pselect6(nfds, readfds, writefds, exceptfds, timeout, NULL) + #[cfg(target_arch = "aarch64")] + { + raw::syscall6( + nr::PSELECT6, + nfds as u64, + readfds_ptr, + writefds_ptr, + exceptfds_ptr, + timeout_ptr, + 0, // NULL sigmask + ) + } }; Error::from_syscall(ret as i64).map(|v| v as usize) } diff --git a/libs/libbreenix/src/process.rs b/libs/libbreenix/src/process.rs index 937b7e7e..1ad4a05a 100644 --- a/libs/libbreenix/src/process.rs +++ b/libs/libbreenix/src/process.rs @@ -37,7 +37,13 @@ pub fn exit(code: i32) -> ! { /// - `Err(Error)` on failure #[inline] pub fn fork() -> Result { - let ret = unsafe { raw::syscall0(nr::FORK) }; + let ret = unsafe { + #[cfg(target_arch = "x86_64")] + { raw::syscall0(nr::FORK) } + // ARM64 Linux has no fork syscall; use clone(SIGCHLD, 0, 0, 0, 0) + #[cfg(target_arch = "aarch64")] + { raw::syscall5(nr::CLONE, 17, 0, 0, 0, 0) } // 17 = SIGCHLD + }; let val = Error::from_syscall(ret as i64)?; if val == 0 { Ok(ForkResult::Child) diff --git a/libs/libbreenix/src/pty.rs b/libs/libbreenix/src/pty.rs index ae213b4e..18cf2d46 100644 --- a/libs/libbreenix/src/pty.rs +++ b/libs/libbreenix/src/pty.rs @@ -12,12 +12,7 @@ pub const O_RDWR: i32 = 0x02; pub const O_NOCTTY: i32 = 0x100; pub const O_CLOEXEC: i32 = 0x80000; -// PTY syscall numbers (will be assigned in kernel) -// For now, use high numbers that won't conflict -pub const SYS_POSIX_OPENPT: u64 = 400; -pub const SYS_GRANTPT: u64 = 401; -pub const SYS_UNLOCKPT: u64 = 402; -pub const SYS_PTSNAME: u64 = 403; +use crate::syscall::nr; /// Open a new PTY master device /// @@ -28,7 +23,7 @@ pub const SYS_PTSNAME: u64 = 403; /// * `Ok(Fd)` - File descriptor for PTY master /// * `Err(Error)` - Error pub fn posix_openpt(flags: i32) -> Result { - let result = unsafe { raw::syscall1(SYS_POSIX_OPENPT, flags as u64) }; + let result = unsafe { raw::syscall1(nr::POSIX_OPENPT, flags as u64) }; Error::from_syscall(result as i64).map(Fd::from_raw) } @@ -44,7 +39,7 @@ pub fn posix_openpt(flags: i32) -> Result { /// * `Ok(())` - Success /// * `Err(Error)` - Error (ENOTTY if not a PTY master) pub fn grantpt(fd: Fd) -> Result<(), Error> { - let result = unsafe { raw::syscall1(SYS_GRANTPT, fd.raw()) }; + let result = unsafe { raw::syscall1(nr::GRANTPT, fd.raw()) }; Error::from_syscall(result as i64).map(|_| ()) } @@ -59,7 +54,7 @@ pub fn grantpt(fd: Fd) -> Result<(), Error> { /// * `Ok(())` - Success /// * `Err(Error)` - Error (ENOTTY if not a PTY master) pub fn unlockpt(fd: Fd) -> Result<(), Error> { - let result = unsafe { raw::syscall1(SYS_UNLOCKPT, fd.raw()) }; + let result = unsafe { raw::syscall1(nr::UNLOCKPT, fd.raw()) }; Error::from_syscall(result as i64).map(|_| ()) } @@ -74,7 +69,7 @@ pub fn unlockpt(fd: Fd) -> Result<(), Error> { /// * `Err(Error)` - Error (ENOTTY if not a PTY master, ERANGE if buffer too small) pub fn ptsname(fd: Fd, buf: &mut [u8]) -> Result { let result = unsafe { - raw::syscall3(SYS_PTSNAME, fd.raw(), buf.as_mut_ptr() as u64, buf.len() as u64) + raw::syscall3(nr::PTSNAME, fd.raw(), buf.as_mut_ptr() as u64, buf.len() as u64) }; Error::from_syscall(result as i64).map(|_| { // Find the actual length (up to null terminator) diff --git a/libs/libbreenix/src/signal.rs b/libs/libbreenix/src/signal.rs index b7f51f35..0dcaf354 100644 --- a/libs/libbreenix/src/signal.rs +++ b/libs/libbreenix/src/signal.rs @@ -7,17 +7,7 @@ use crate::error::Error; use crate::syscall::raw; -// Syscall numbers (must match kernel/src/syscall/mod.rs) -pub const SYS_SIGACTION: u64 = 13; -pub const SYS_SIGPROCMASK: u64 = 14; -pub const SYS_SIGRETURN: u64 = 15; -pub const SYS_GETITIMER: u64 = 36; -pub const SYS_ALARM: u64 = 37; -pub const SYS_SETITIMER: u64 = 38; -pub const SYS_KILL: u64 = 62; -pub const SYS_SIGPENDING: u64 = 127; -pub const SYS_SIGSUSPEND: u64 = 130; -pub const SYS_SIGALTSTACK: u64 = 131; +use crate::syscall::nr; // Signal numbers (must match kernel/src/signal/constants.rs) pub const SIGHUP: i32 = 1; @@ -133,7 +123,7 @@ pub extern "C" fn __restore_rt() -> ! { #[unsafe(naked)] pub extern "C" fn __restore_rt() -> ! { core::arch::naked_asm!( - "mov x8, 15", // SYS_rt_sigreturn + "mov x8, 139", // SYS_rt_sigreturn (Linux ARM64) "svc #0", // Trigger syscall "brk #1", // Should never reach here ) @@ -249,7 +239,7 @@ impl Default for StackT { /// } /// ``` pub fn kill(pid: i32, sig: i32) -> Result<(), Error> { - let ret = unsafe { raw::syscall2(SYS_KILL, pid as u64, sig as u64) }; + let ret = unsafe { raw::syscall2(nr::KILL, pid as u64, sig as u64) }; Error::from_syscall(ret as i64).map(|_| ()) } @@ -282,7 +272,7 @@ pub fn sigaction( let oldact_ptr = oldact.map_or(0, |a| a as *mut _ as u64); let ret = unsafe { - raw::syscall4(SYS_SIGACTION, sig as u64, act_ptr, oldact_ptr, 8) + raw::syscall4(nr::SIGACTION, sig as u64, act_ptr, oldact_ptr, 8) }; Error::from_syscall(ret as i64).map(|_| ()) @@ -314,7 +304,7 @@ pub fn sigprocmask(how: i32, set: Option<&u64>, oldset: Option<&mut u64>) -> Res let oldset_ptr = oldset.map_or(0, |s| s as *mut _ as u64); let ret = unsafe { - raw::syscall4(SYS_SIGPROCMASK, how as u64, set_ptr, oldset_ptr, 8) + raw::syscall4(nr::SIGPROCMASK, how as u64, set_ptr, oldset_ptr, 8) }; Error::from_syscall(ret as i64).map(|_| ()) @@ -330,7 +320,7 @@ pub fn sigprocmask(how: i32, set: Option<&u64>, oldset: Option<&mut u64>) -> Res /// This function never returns normally. It restores execution to /// the point where the signal was delivered. pub unsafe fn sigreturn() -> ! { - raw::syscall0(SYS_SIGRETURN); + raw::syscall0(nr::SIGRETURN); // Should never reach here, but if it does, loop forever loop { core::hint::spin_loop(); @@ -358,7 +348,17 @@ pub unsafe fn sigreturn() -> ! { /// let _ = pause(); // Will return when SIGUSR1 is received /// ``` pub fn pause() -> Result<(), Error> { - let ret = unsafe { raw::syscall0(crate::syscall::nr::PAUSE) }; + let ret = unsafe { + #[cfg(target_arch = "x86_64")] + { raw::syscall0(crate::syscall::nr::PAUSE) } + // ARM64 Linux has no pause syscall; use sigsuspend with empty mask + // (unblocks all signals, waits for any signal delivery) + #[cfg(target_arch = "aarch64")] + { + let mask: u64 = 0; + raw::syscall2(crate::syscall::nr::SIGSUSPEND, &mask as *const u64 as u64, 8) + } + }; // pause always returns -EINTR when a signal is caught Error::from_syscall(ret as i64).map(|_| ()) } @@ -424,7 +424,7 @@ pub fn signame(sig: i32) -> &'static str { /// * `Err(Error)` on failure pub fn sigpending(set: &mut u64) -> Result<(), Error> { let ret = unsafe { - raw::syscall2(SYS_SIGPENDING, set as *mut u64 as u64, 8) + raw::syscall2(nr::SIGPENDING, set as *mut u64 as u64, 8) }; Error::from_syscall(ret as i64).map(|_| ()) } @@ -442,7 +442,7 @@ pub fn sigpending(set: &mut u64) -> Result<(), Error> { /// * Always returns `Err(Error)` with EINTR (interrupted by signal) pub fn sigsuspend(mask: &u64) -> Result<(), Error> { let ret = unsafe { - raw::syscall2(SYS_SIGSUSPEND, mask as *const u64 as u64, 8) + raw::syscall2(nr::SIGSUSPEND, mask as *const u64 as u64, 8) }; // sigsuspend always returns -EINTR when a signal is caught Error::from_syscall(ret as i64).map(|_| ()) @@ -462,7 +462,7 @@ pub fn sigaltstack(ss: Option<&StackT>, old_ss: Option<&mut StackT>) -> Result<( let old_ss_ptr = old_ss.map_or(0, |s| s as *mut _ as u64); let ret = unsafe { - raw::syscall2(SYS_SIGALTSTACK, ss_ptr, old_ss_ptr) + raw::syscall2(nr::SIGALTSTACK, ss_ptr, old_ss_ptr) }; Error::from_syscall(ret as i64).map(|_| ()) @@ -479,8 +479,23 @@ pub fn sigaltstack(ss: Option<&StackT>, old_ss: Option<&mut StackT>) -> Result<( /// # Returns /// * The number of seconds remaining from a previous alarm (0 if none) pub fn alarm(seconds: u32) -> u32 { - unsafe { - raw::syscall1(SYS_ALARM, seconds as u64) as u32 + #[cfg(target_arch = "x86_64")] + { + unsafe { raw::syscall1(nr::ALARM, seconds as u64) as u32 } + } + // ARM64 Linux has no alarm syscall; implement via setitimer(ITIMER_REAL) + #[cfg(target_arch = "aarch64")] + { + let new_val = Itimerval { + it_interval: Timeval { tv_sec: 0, tv_usec: 0 }, + it_value: Timeval { tv_sec: seconds as i64, tv_usec: 0 }, + }; + let mut old_val = Itimerval { + it_interval: Timeval { tv_sec: 0, tv_usec: 0 }, + it_value: Timeval { tv_sec: 0, tv_usec: 0 }, + }; + let _ = setitimer(ITIMER_REAL, &new_val, Some(&mut old_val)); + old_val.it_value.tv_sec as u32 } } @@ -495,7 +510,7 @@ pub fn alarm(seconds: u32) -> u32 { /// * `Err(Error)` on failure pub fn getitimer(which: i32, curr_value: &mut Itimerval) -> Result<(), Error> { let ret = unsafe { - raw::syscall2(SYS_GETITIMER, which as u64, curr_value as *mut _ as u64) + raw::syscall2(nr::GETITIMER, which as u64, curr_value as *mut _ as u64) }; Error::from_syscall(ret as i64).map(|_| ()) @@ -535,7 +550,7 @@ pub fn setitimer(which: i32, new_value: &Itimerval, old_value: Option<&mut Itime let old_ptr = old_value.map_or(0, |v| v as *mut _ as u64); let ret = unsafe { - raw::syscall3(SYS_SETITIMER, which as u64, new_value as *const _ as u64, old_ptr) + raw::syscall3(nr::SETITIMER, which as u64, new_value as *const _ as u64, old_ptr) }; Error::from_syscall(ret as i64).map(|_| ()) diff --git a/libs/libbreenix/src/syscall.rs b/libs/libbreenix/src/syscall.rs index 60978738..981521d4 100644 --- a/libs/libbreenix/src/syscall.rs +++ b/libs/libbreenix/src/syscall.rs @@ -15,38 +15,42 @@ use core::arch::asm; /// Syscall numbers matching kernel/src/syscall/mod.rs +/// +/// x86_64: Uses Linux x86_64 ABI numbers for musl libc compatibility. +/// ARM64: Uses Linux ARM64 (asm-generic/unistd.h) numbers for musl libc compatibility. +#[cfg(target_arch = "x86_64")] pub mod nr { - pub const EXIT: u64 = 0; + // Linux x86_64 ABI numbers + pub const READ: u64 = 0; pub const WRITE: u64 = 1; - pub const READ: u64 = 2; - pub const YIELD: u64 = 3; - pub const GET_TIME: u64 = 4; - pub const FORK: u64 = 5; - pub const CLOSE: u64 = 6; // Custom number (not Linux standard) - pub const POLL: u64 = 7; // Linux x86_64 poll - pub const MMAP: u64 = 9; // Linux x86_64 mmap - pub const MPROTECT: u64 = 10; // Linux x86_64 mprotect - pub const MUNMAP: u64 = 11; // Linux x86_64 munmap + pub const CLOSE: u64 = 3; + pub const FSTAT: u64 = 5; + pub const POLL: u64 = 7; + pub const LSEEK: u64 = 8; + pub const MMAP: u64 = 9; + pub const MPROTECT: u64 = 10; + pub const MUNMAP: u64 = 11; pub const BRK: u64 = 12; - pub const SIGACTION: u64 = 13; // Linux x86_64 rt_sigaction - pub const SIGPROCMASK: u64 = 14; // Linux x86_64 rt_sigprocmask - pub const SIGRETURN: u64 = 15; // Linux x86_64 rt_sigreturn - pub const IOCTL: u64 = 16; // Linux x86_64 ioctl - pub const ACCESS: u64 = 21; // Linux x86_64 access - pub const PIPE: u64 = 22; // Linux x86_64 pipe - pub const GETCWD: u64 = 79; // Linux x86_64 getcwd - pub const CHDIR: u64 = 80; // Linux x86_64 chdir - pub const SELECT: u64 = 23; // Linux x86_64 select - pub const PIPE2: u64 = 293; // Linux x86_64 pipe2 - pub const DUP: u64 = 32; // Linux x86_64 dup - pub const DUP2: u64 = 33; // Linux x86_64 dup2 - pub const PAUSE: u64 = 34; // Linux x86_64 pause - pub const NANOSLEEP: u64 = 35; // Linux x86_64 nanosleep - pub const GETITIMER: u64 = 36; // Linux x86_64 getitimer - pub const ALARM: u64 = 37; // Linux x86_64 alarm - pub const SETITIMER: u64 = 38; // Linux x86_64 setitimer + pub const SIGACTION: u64 = 13; + pub const SIGPROCMASK: u64 = 14; + pub const SIGRETURN: u64 = 15; + pub const IOCTL: u64 = 16; + pub const READV: u64 = 19; + pub const WRITEV: u64 = 20; + pub const ACCESS: u64 = 21; + pub const PIPE: u64 = 22; + pub const SELECT: u64 = 23; + pub const YIELD: u64 = 24; + pub const MREMAP: u64 = 25; + pub const MADVISE: u64 = 28; + pub const DUP: u64 = 32; + pub const DUP2: u64 = 33; + pub const PAUSE: u64 = 34; + pub const NANOSLEEP: u64 = 35; + pub const GETITIMER: u64 = 36; + pub const ALARM: u64 = 37; + pub const SETITIMER: u64 = 38; pub const GETPID: u64 = 39; - pub const FCNTL: u64 = 72; // Linux x86_64 fcntl pub const SOCKET: u64 = 41; pub const CONNECT: u64 = 42; pub const ACCEPT: u64 = 43; @@ -55,55 +59,213 @@ pub mod nr { pub const SHUTDOWN: u64 = 48; pub const BIND: u64 = 49; pub const LISTEN: u64 = 50; - pub const GETSOCKNAME: u64 = 51; // Linux x86_64 getsockname - pub const GETPEERNAME: u64 = 52; // Linux x86_64 getpeername - pub const SOCKETPAIR: u64 = 53; // Linux x86_64 socketpair - pub const SETSOCKOPT: u64 = 54; // Linux x86_64 setsockopt - pub const GETSOCKOPT: u64 = 55; // Linux x86_64 getsockopt - pub const EXEC: u64 = 59; // Linux x86_64 execve - pub const WAIT4: u64 = 61; // Linux x86_64 wait4/waitpid - pub const KILL: u64 = 62; // Linux x86_64 kill - pub const SETPGID: u64 = 109; // Linux x86_64 setpgid - pub const GETPPID: u64 = 110; // Linux x86_64 getppid - pub const SETSID: u64 = 112; // Linux x86_64 setsid - pub const GETPGID: u64 = 121; // Linux x86_64 getpgid - pub const GETSID: u64 = 124; // Linux x86_64 getsid - pub const SIGPENDING: u64 = 127; // Linux x86_64 rt_sigpending - pub const SIGSUSPEND: u64 = 130; // Linux x86_64 rt_sigsuspend - pub const SIGALTSTACK: u64 = 131; // Linux x86_64 sigaltstack - pub const RENAME: u64 = 82; // Linux x86_64 rename - pub const MKDIR: u64 = 83; // Linux x86_64 mkdir - pub const RMDIR: u64 = 84; // Linux x86_64 rmdir - pub const LINK: u64 = 86; // Linux x86_64 link (hard links) - pub const UNLINK: u64 = 87; // Linux x86_64 unlink - pub const SYMLINK: u64 = 88; // Linux x86_64 symlink - pub const READLINK: u64 = 89; // Linux x86_64 readlink - pub const MKNOD: u64 = 133; // Linux x86_64 mknod (used for mkfifo) + pub const GETSOCKNAME: u64 = 51; + pub const GETPEERNAME: u64 = 52; + pub const SOCKETPAIR: u64 = 53; + pub const SETSOCKOPT: u64 = 54; + pub const GETSOCKOPT: u64 = 55; + pub const CLONE: u64 = 56; + pub const FORK: u64 = 57; + pub const EXEC: u64 = 59; + pub const EXIT: u64 = 60; + pub const WAIT4: u64 = 61; + pub const KILL: u64 = 62; + pub const FCNTL: u64 = 72; + pub const GETCWD: u64 = 79; + pub const CHDIR: u64 = 80; + pub const RENAME: u64 = 82; + pub const MKDIR: u64 = 83; + pub const RMDIR: u64 = 84; + pub const LINK: u64 = 86; + pub const UNLINK: u64 = 87; + pub const SYMLINK: u64 = 88; + pub const READLINK: u64 = 89; + pub const SETPGID: u64 = 109; + pub const GETPPID: u64 = 110; + pub const SETSID: u64 = 112; + pub const GETPGID: u64 = 121; + pub const GETSID: u64 = 124; + pub const SIGPENDING: u64 = 127; + pub const SIGSUSPEND: u64 = 130; + pub const SIGALTSTACK: u64 = 131; + pub const MKNOD: u64 = 133; + pub const ARCH_PRCTL: u64 = 158; pub const GETTID: u64 = 186; - pub const SET_TID_ADDRESS: u64 = 218; // Linux x86_64 set_tid_address + pub const FUTEX: u64 = 202; + pub const GETDENTS64: u64 = 217; + pub const SET_TID_ADDRESS: u64 = 218; pub const CLOCK_GETTIME: u64 = 228; - pub const EXIT_GROUP: u64 = 231; // Linux x86_64 exit_group - pub const OPEN: u64 = 257; // Breenix: filesystem open syscall - pub const LSEEK: u64 = 258; // Breenix: filesystem lseek syscall - pub const FSTAT: u64 = 259; // Breenix: filesystem fstat syscall - pub const GETDENTS64: u64 = 260; // Breenix: directory listing syscall + pub const EXIT_GROUP: u64 = 231; + pub const OPEN: u64 = 2; // Linux x86_64 open + pub const NEWFSTATAT: u64 = 262; + pub const OPENAT: u64 = 257; + pub const MKDIRAT: u64 = 258; + pub const MKNODAT: u64 = 259; + pub const UNLINKAT: u64 = 263; + pub const RENAMEAT: u64 = 264; + pub const LINKAT: u64 = 265; + pub const SYMLINKAT: u64 = 266; + pub const READLINKAT: u64 = 267; + pub const FACCESSAT: u64 = 269; + pub const PSELECT6: u64 = 270; + pub const PPOLL: u64 = 271; + pub const SET_ROBUST_LIST: u64 = 273; + pub const DUP3: u64 = 292; + pub const PIPE2: u64 = 293; + pub const GETRANDOM: u64 = 318; + // PTY syscalls (Breenix-specific, same on both architectures) + pub const POSIX_OPENPT: u64 = 400; + pub const GRANTPT: u64 = 401; + pub const UNLOCKPT: u64 = 402; + pub const PTSNAME: u64 = 403; + // Graphics syscalls (Breenix-specific) + pub const FBINFO: u64 = 410; + pub const FBDRAW: u64 = 411; + pub const FBMMAP: u64 = 412; + pub const GET_MOUSE_POS: u64 = 413; + // Audio syscalls (Breenix-specific) + pub const AUDIO_INIT: u64 = 420; + pub const AUDIO_WRITE: u64 = 421; + // Display takeover (Breenix-specific) + pub const TAKE_OVER_DISPLAY: u64 = 431; + pub const GIVE_BACK_DISPLAY: u64 = 432; + // Testing syscalls (Breenix-specific) + pub const COW_STATS: u64 = 500; + pub const SIMULATE_OOM: u64 = 501; +} + +#[cfg(target_arch = "aarch64")] +pub mod nr { + // Linux ARM64 ABI numbers (asm-generic/unistd.h) + // ARM64 Linux has NO legacy syscalls: use *at variants instead of + // open/mkdir/rmdir/link/unlink/symlink/readlink/mknod/rename/access. + // Use dup3 instead of dup2, pipe2 instead of pipe, clone instead of fork. + + // I/O + pub const GETCWD: u64 = 17; + pub const DUP: u64 = 23; + pub const DUP3: u64 = 24; + pub const FCNTL: u64 = 25; + pub const IOCTL: u64 = 29; + + // Filesystem *at variants + pub const MKNODAT: u64 = 33; + pub const MKDIRAT: u64 = 34; + pub const UNLINKAT: u64 = 35; + pub const SYMLINKAT: u64 = 36; + pub const LINKAT: u64 = 37; + pub const RENAMEAT: u64 = 38; + pub const FACCESSAT: u64 = 48; + pub const CHDIR: u64 = 49; + pub const OPENAT: u64 = 56; + pub const CLOSE: u64 = 57; + pub const PIPE2: u64 = 59; + pub const GETDENTS64: u64 = 61; + pub const LSEEK: u64 = 62; + pub const READ: u64 = 63; + pub const WRITE: u64 = 64; + pub const READV: u64 = 65; + pub const WRITEV: u64 = 66; + + // I/O multiplexing + pub const PSELECT6: u64 = 72; + pub const PPOLL: u64 = 73; + pub const READLINKAT: u64 = 78; + pub const NEWFSTATAT: u64 = 79; + pub const FSTAT: u64 = 80; + + // Process management + pub const EXIT: u64 = 93; + pub const EXIT_GROUP: u64 = 94; + pub const SET_TID_ADDRESS: u64 = 96; + pub const FUTEX: u64 = 98; + pub const SET_ROBUST_LIST: u64 = 99; + + // Timers + pub const NANOSLEEP: u64 = 101; + pub const GETITIMER: u64 = 102; + pub const SETITIMER: u64 = 103; + pub const CLOCK_GETTIME: u64 = 113; + + // Scheduling + pub const YIELD: u64 = 124; + + // Signals + pub const KILL: u64 = 129; + pub const SIGALTSTACK: u64 = 132; + pub const SIGSUSPEND: u64 = 133; + pub const SIGACTION: u64 = 134; + pub const SIGPROCMASK: u64 = 135; + pub const SIGPENDING: u64 = 136; + pub const SIGRETURN: u64 = 139; + + // Session/process group + pub const SETPGID: u64 = 154; + pub const GETPGID: u64 = 155; + pub const GETSID: u64 = 156; + pub const SETSID: u64 = 157; + + // Process info + pub const GETPID: u64 = 172; + pub const GETPPID: u64 = 173; + pub const GETTID: u64 = 178; + + // Socket + pub const SOCKET: u64 = 198; + pub const SOCKETPAIR: u64 = 199; + pub const BIND: u64 = 200; + pub const LISTEN: u64 = 201; + pub const ACCEPT: u64 = 202; + pub const CONNECT: u64 = 203; + pub const GETSOCKNAME: u64 = 204; + pub const GETPEERNAME: u64 = 205; + pub const SENDTO: u64 = 206; + pub const RECVFROM: u64 = 207; + pub const SETSOCKOPT: u64 = 208; + pub const GETSOCKOPT: u64 = 209; + pub const SHUTDOWN: u64 = 210; + + // Memory + pub const BRK: u64 = 214; + pub const MUNMAP: u64 = 215; + pub const MREMAP: u64 = 216; + pub const CLONE: u64 = 220; + pub const EXEC: u64 = 221; + pub const MMAP: u64 = 222; + pub const MPROTECT: u64 = 226; + pub const MADVISE: u64 = 233; + + // Wait + pub const WAIT4: u64 = 260; + + // Random + pub const GETRANDOM: u64 = 278; + + // NOTE: ARM64 Linux has NO legacy syscalls: open, dup2, pipe, fork, + // access, rename, mkdir, rmdir, link, unlink, symlink, readlink, + // mknod, select, poll, alarm, pause. Callers must use the *at variants + // (openat, mkdirat, etc.) or modern replacements (dup3, pipe2, clone) + // with the correct argument counts. See libbreenix/src/fs.rs for examples. + + // PTY syscalls (Breenix-specific, same on both architectures) + pub const POSIX_OPENPT: u64 = 400; + pub const GRANTPT: u64 = 401; + pub const UNLOCKPT: u64 = 402; + pub const PTSNAME: u64 = 403; // Graphics syscalls (Breenix-specific) - pub const FBINFO: u64 = 410; // Breenix: get framebuffer info - pub const FBDRAW: u64 = 411; // Breenix: draw to framebuffer - pub const FBMMAP: u64 = 412; // Breenix: mmap framebuffer into userspace - pub const GET_MOUSE_POS: u64 = 413; // Breenix: get mouse cursor position + pub const FBINFO: u64 = 410; + pub const FBDRAW: u64 = 411; + pub const FBMMAP: u64 = 412; + pub const GET_MOUSE_POS: u64 = 413; // Audio syscalls (Breenix-specific) - pub const AUDIO_INIT: u64 = 420; // Breenix: initialize audio stream - pub const AUDIO_WRITE: u64 = 421; // Breenix: write PCM data to audio device - pub const CLONE: u64 = 56; // Linux x86_64 clone - pub const FUTEX: u64 = 202; // Linux x86_64 futex - pub const GETRANDOM: u64 = 318; // Linux x86_64 getrandom + pub const AUDIO_INIT: u64 = 420; + pub const AUDIO_WRITE: u64 = 421; // Display takeover (Breenix-specific) - pub const TAKE_OVER_DISPLAY: u64 = 431; // Breenix: userspace takes over display - pub const GIVE_BACK_DISPLAY: u64 = 432; // Breenix: userspace gives display back + pub const TAKE_OVER_DISPLAY: u64 = 431; + pub const GIVE_BACK_DISPLAY: u64 = 432; // Testing syscalls (Breenix-specific) - pub const COW_STATS: u64 = 500; // Breenix: get CoW statistics (for testing) - pub const SIMULATE_OOM: u64 = 501; // Breenix: enable/disable OOM simulation (for testing) + pub const COW_STATS: u64 = 500; + pub const SIMULATE_OOM: u64 = 501; } /// Raw syscall functions - use higher-level wrappers when possible diff --git a/libs/libbreenix/src/termios.rs b/libs/libbreenix/src/termios.rs index 45ed85a7..df4afaf0 100644 --- a/libs/libbreenix/src/termios.rs +++ b/libs/libbreenix/src/termios.rs @@ -8,8 +8,7 @@ use crate::error::Error; use crate::syscall::raw; use crate::types::Fd; -/// Syscall number for ioctl -pub const SYS_IOCTL: u64 = 16; +use crate::syscall::nr; /// ioctl request codes pub mod request { @@ -106,7 +105,7 @@ impl Default for Termios { /// Get terminal attributes pub fn tcgetattr(fd: Fd, termios: &mut Termios) -> Result<(), Error> { let ret = unsafe { - raw::syscall3(SYS_IOCTL, fd.raw(), request::TCGETS, termios as *mut _ as u64) + raw::syscall3(nr::IOCTL, fd.raw(), request::TCGETS, termios as *mut _ as u64) }; Error::from_syscall(ret as i64).map(|_| ()) @@ -122,7 +121,7 @@ pub fn tcsetattr(fd: Fd, action: i32, termios: &Termios) -> Result<(), Error> { }; let ret = unsafe { - raw::syscall3(SYS_IOCTL, fd.raw(), request, termios as *const _ as u64) + raw::syscall3(nr::IOCTL, fd.raw(), request, termios as *const _ as u64) }; Error::from_syscall(ret as i64).map(|_| ()) @@ -131,7 +130,7 @@ pub fn tcsetattr(fd: Fd, action: i32, termios: &Termios) -> Result<(), Error> { /// Set terminal window size (TIOCSWINSZ) pub fn set_winsize(fd: Fd, ws: &Winsize) -> Result<(), Error> { let ret = unsafe { - raw::syscall3(SYS_IOCTL, fd.raw(), request::TIOCSWINSZ, ws as *const _ as u64) + raw::syscall3(nr::IOCTL, fd.raw(), request::TIOCSWINSZ, ws as *const _ as u64) }; Error::from_syscall(ret as i64).map(|_| ()) } @@ -140,7 +139,7 @@ pub fn set_winsize(fd: Fd, ws: &Winsize) -> Result<(), Error> { pub fn get_winsize(fd: Fd) -> Result { let mut ws = Winsize::default(); let ret = unsafe { - raw::syscall3(SYS_IOCTL, fd.raw(), request::TIOCGWINSZ, &mut ws as *mut _ as u64) + raw::syscall3(nr::IOCTL, fd.raw(), request::TIOCGWINSZ, &mut ws as *mut _ as u64) }; Error::from_syscall(ret as i64).map(|_| ws) } @@ -166,7 +165,7 @@ pub fn tcgetpgrp(fd: Fd) -> Result { let mut pgrp: i32 = 0; let ret = unsafe { raw::syscall3( - SYS_IOCTL, + nr::IOCTL, fd.raw(), request::TIOCGPGRP, &mut pgrp as *mut i32 as u64, @@ -192,7 +191,7 @@ pub fn tcsetpgrp(fd: Fd, pgrp: i32) -> Result<(), Error> { let pgrp_val = pgrp; let ret = unsafe { raw::syscall3( - SYS_IOCTL, + nr::IOCTL, fd.raw(), request::TIOCSPGRP, &pgrp_val as *const i32 as u64, diff --git a/libs/libbreenix/src/time.rs b/libs/libbreenix/src/time.rs index fdf689b7..4f6a3824 100644 --- a/libs/libbreenix/src/time.rs +++ b/libs/libbreenix/src/time.rs @@ -34,14 +34,6 @@ pub fn clock_gettime(clock_id: u32, ts: &mut Timespec) -> Result<(), Error> { Error::from_syscall(ret as i64).map(|_| ()) } -/// Get the monotonic time since boot (deprecated, use clock_gettime). -/// -/// Returns time in milliseconds. -#[inline] -#[deprecated(note = "Use clock_gettime with CLOCK_MONOTONIC for better precision")] -pub fn get_time_ms() -> u64 { - unsafe { raw::syscall0(nr::GET_TIME) } -} /// Get current wall-clock (real) time. /// diff --git a/userspace/c-programs/Makefile b/userspace/c-programs/Makefile new file mode 100644 index 00000000..491a2a25 --- /dev/null +++ b/userspace/c-programs/Makefile @@ -0,0 +1,64 @@ +# Build C programs with musl libc for Breenix (aarch64) +# +# Prerequisites: +# - Homebrew LLVM (clang, llvm-ar, llvm-ranlib) at /opt/homebrew/opt/llvm/bin/ +# - LLD linker at /opt/homebrew/Cellar/lld@20/20.1.8/bin/ld.lld +# - musl libc installed at third-party/musl-install/ +# - compiler-rt builtins at third-party/compiler-rt-builtins/ +# +# Build: +# make # build hello_musl.elf +# make install # copy to userspace/programs/aarch64/ +# make clean + +BREENIX_ROOT := $(shell cd ../.. && pwd) + +# Toolchain +LLVM := /opt/homebrew/opt/llvm/bin +CC := $(LLVM)/clang +AR := $(LLVM)/llvm-ar +LLD := /opt/homebrew/Cellar/lld@20/20.1.8/bin/ld.lld +CLANG_INCLUDE := $(shell $(CC) --target=aarch64-linux-musl -print-resource-dir)/include + +# musl libc +MUSL_INSTALL := $(BREENIX_ROOT)/third-party/musl-install +MUSL_LIB := $(MUSL_INSTALL)/lib +MUSL_INCLUDE := $(MUSL_INSTALL)/include + +# compiler-rt builtins (for 128-bit float operations) +RT_LIB := $(BREENIX_ROOT)/third-party/compiler-rt-builtins + +# Breenix linker script +LINKER_SCRIPT := $(BREENIX_ROOT)/userspace/programs/linker-aarch64-musl.ld + +# Compiler flags +CFLAGS := --target=aarch64-linux-musl -ffreestanding -O2 -nostdinc -isystem $(MUSL_INCLUDE) + +# Linker flags +LDFLAGS := -T $(LINKER_SCRIPT) --static \ + $(MUSL_LIB)/crt1.o $(MUSL_LIB)/crti.o \ + -L$(MUSL_LIB) -lc \ + -L$(RT_LIB) -lcompiler_rt_builtins \ + $(MUSL_LIB)/crtn.o + +# Output directory +AARCH64_DIR := $(BREENIX_ROOT)/userspace/programs/aarch64 + +# Programs +PROGRAMS := hello_musl + +.PHONY: all install clean + +all: $(addsuffix .elf,$(PROGRAMS)) + +%.o: %.c + $(CC) $(CFLAGS) -c $< -o $@ + +hello_musl.elf: hello.o + $(LLD) $(LDFLAGS) $< -o $@ + +install: all + cp hello_musl.elf $(AARCH64_DIR)/hello_musl.elf + +clean: + rm -f *.o *.elf diff --git a/userspace/c-programs/hello.c b/userspace/c-programs/hello.c new file mode 100644 index 00000000..46273414 --- /dev/null +++ b/userspace/c-programs/hello.c @@ -0,0 +1,6 @@ +#include + +int main(int argc, char *argv[]) { + printf("Hello from musl libc on Breenix!\n"); + return 0; +} diff --git a/userspace/programs/linker-aarch64-musl.ld b/userspace/programs/linker-aarch64-musl.ld new file mode 100644 index 00000000..b35de9be --- /dev/null +++ b/userspace/programs/linker-aarch64-musl.ld @@ -0,0 +1,67 @@ +/* ARM64 Userspace linker script for musl libc programs on Breenix */ +ENTRY(_start) + +SECTIONS { + /* Start at 1GB (userspace area) - matches kernel MMU setup */ + . = 0x40000000; + + .text : ALIGN(4K) { + *(.text .text.*) + } + + .rodata : ALIGN(4K) { + *(.rodata .rodata.*) + } + + .init_array : ALIGN(8) { + __init_array_start = .; + *(.init_array .init_array.*) + __init_array_end = .; + } + + .fini_array : ALIGN(8) { + __fini_array_start = .; + *(.fini_array .fini_array.*) + __fini_array_end = .; + } + + /* Global Offset Table - needed for PIC code in libc */ + .got : ALIGN(8) { + *(.got .got.*) + } + + .got.plt : ALIGN(8) { + *(.got.plt) + } + + .data.rel.ro : ALIGN(4K) { + *(.data.rel.ro .data.rel.ro.*) + } + + .data : ALIGN(4K) { + *(.data .data.*) + } + + .bss : ALIGN(4K) { + *(.bss .bss.*) + *(COMMON) + } + + /* Thread-local storage */ + .tdata : ALIGN(8) { + *(.tdata .tdata.*) + } + + .tbss : ALIGN(8) { + *(.tbss .tbss.*) + } + + /* Stack allocated separately by kernel */ + + /DISCARD/ : { + *(.note.GNU-stack) + *(.comment) + *(.eh_frame) + *(.eh_frame_hdr) + } +} diff --git a/userspace/programs/src/hello_std_real.rs b/userspace/programs/src/hello_std_real.rs index b4c12495..320a39bd 100644 --- a/userspace/programs/src/hello_std_real.rs +++ b/userspace/programs/src/hello_std_real.rs @@ -58,11 +58,11 @@ fn main() { #[cfg(target_arch = "x86_64")] unsafe { core::arch::asm!( - "int 0x80", // SYS_exit(0) - Breenix Exit=0 + "int 0x80", // SYS_exit(0) - Linux x86_64 Exit=60 "2:", "pause", // Spin-loop hint (valid in Ring 3) "jmp 2b", - in("rax") 0u64, // SYS_EXIT = 0 in Breenix + in("rax") 60u64, // SYS_EXIT = 60 (Linux x86_64 ABI) in("rdi") 0u64, options(noreturn), ); @@ -70,11 +70,11 @@ fn main() { #[cfg(target_arch = "aarch64")] unsafe { core::arch::asm!( - "svc #0", // SYS_exit(0) - Breenix Exit=0 + "svc #0", // SYS_exit(0) "2:", "yield", // Spin-loop hint (ARM64 equivalent of PAUSE) "b 2b", - in("x8") 0u64, // SYS_EXIT = 0 in Breenix + in("x8") 93u64, // SYS_EXIT = 93 (Linux ARM64) in("x0") 0u64, options(noreturn), ); @@ -134,7 +134,7 @@ fn main() { #[cfg(target_arch = "aarch64")] core::arch::asm!( "svc #0", - in("x8") 56u64, // SYS_clone + in("x8") 220u64, // SYS_clone (Linux ARM64) inlateout("x0") flags as u64 => ret, in("x1") stack_top as u64, in("x2") child_fn as u64, @@ -159,11 +159,11 @@ fn main() { if tid_val == 0 { break; } - // Yield CPU: Breenix Yield=3 + // Yield CPU: Linux x86_64 sched_yield=24 #[cfg(target_arch = "x86_64")] - core::arch::asm!("int 0x80", in("rax") 3u64, options(nostack)); + core::arch::asm!("int 0x80", in("rax") 24u64, options(nostack)); #[cfg(target_arch = "aarch64")] - core::arch::asm!("svc #0", in("x8") 3u64, in("x0") 0u64, options(nostack)); + core::arch::asm!("svc #0", in("x8") 124u64, in("x0") 0u64, options(nostack)); // Print progress every 1M iterations if i > 0 && i % 1_000_000 == 0 { let digit = b'0' + (i / 1_000_000) as u8; diff --git a/userspace/programs/src/init.rs b/userspace/programs/src/init.rs index a7ea97c5..28c64bbf 100644 --- a/userspace/programs/src/init.rs +++ b/userspace/programs/src/init.rs @@ -15,7 +15,6 @@ use libbreenix::process::{fork, exec, waitpid, getpid, yield_now, ForkResult, WN const TELNETD_PATH: &[u8] = b"/sbin/telnetd\0"; const BLOGD_PATH: &[u8] = b"/sbin/blogd\0"; const BWM_PATH: &[u8] = b"/bin/bwm\0"; - /// Maximum number of rapid respawns before giving up on a service. const MAX_RESPAWN_FAILURES: u32 = 3; @@ -24,10 +23,13 @@ fn spawn(path: &[u8], name: &str) -> i64 { match fork() { Ok(ForkResult::Child) => { // Child: exec the binary - let _ = exec(path); - // exec failed - print!("[init] ERROR: exec failed for {}\n", name); - std::process::exit(127); + match exec(path) { + Ok(_) => unreachable!(), + Err(e) => { + print!("[init] ERROR: exec failed for {} ({})\n", name, e); + std::process::exit(127); + } + } } Ok(ForkResult::Parent(child_pid)) => { child_pid.raw() as i64 @@ -49,6 +51,31 @@ fn try_respawn(path: &[u8], name: &str, failures: &mut u32) -> i64 { spawn(path, name) } +/// Test: simple fork + exit + waitpid to exercise process lifecycle under SMP load. +fn test_fork_exit() { + match fork() { + Ok(ForkResult::Child) => { + // Child: just exit immediately + libbreenix::process::exit(127); + } + Ok(ForkResult::Parent(child_pid)) => { + let child_raw = child_pid.raw() as i32; + let mut status: i32 = 0; + match waitpid(child_raw, &mut status as *mut i32, 0) { + Ok(reaped) => { + print!("[init] TEST: child {} reaped, status={}\n", reaped.raw(), status); + } + Err(e) => { + print!("[init] TEST: waitpid(pid={}) failed: {}\n", child_raw, e); + } + } + } + Err(e) => { + print!("[init] TEST: fork failed: {}\n", e); + } + } +} + fn main() { let pid = getpid().map(|p| p.raw()).unwrap_or(0); print!("[init] Breenix init starting (PID {})\n", pid); @@ -69,6 +96,17 @@ fn main() { let mut bwm_pid = spawn(BWM_PATH, "bwm"); let mut bwm_failures: u32 = 0; + // Test: simple fork + exit + waitpid under SMP load (process lifecycle regression) + // Run after BWM is started so there's full SMP contention. + // Keep at 5 iterations — enough to stress-test without delaying BWM init + // past the strict boot test's 18-second detection window. + for i in 0..5 { + print!("[init] TEST {}/5: fork+exit...\n", i + 1); + test_fork_exit(); + let _ = yield_now(); + } + print!("[init] TEST: all 5 iterations completed successfully\n"); + // Main loop: reap zombies and respawn crashed services. let mut status: i32 = 0; loop { diff --git a/userspace/programs/src/init_shell.rs b/userspace/programs/src/init_shell.rs index 19547f7c..1d54ffa3 100644 --- a/userspace/programs/src/init_shell.rs +++ b/userspace/programs/src/init_shell.rs @@ -65,8 +65,9 @@ const CSIZE: u32 = 0x0030; const CS8: u32 = 0x0030; const PARENB: u32 = 0x0100; -// Syscall numbers for calls not yet in libbreenix -const SYS_OPEN: u64 = 2; +// Use the canonical syscall number from libbreenix +use libbreenix::syscall::nr; +const AT_FDCWD: u64 = (-100i64) as u64; /// Raw execve wrapper using libbreenix syscall primitives. /// This replaces the `extern "C" { fn execve(...) }` FFI import. @@ -86,15 +87,17 @@ fn sys_execve(path: *const u8, argv: *const *const u8, envp: *const *const u8) - // ============================================================================ fn sys_open(path: *const u8, flags: i32) -> i64 { - unsafe { libbreenix::raw::syscall3(SYS_OPEN, path as u64, flags as u64, 0) as i64 } + unsafe { libbreenix::raw::syscall4(nr::OPENAT, AT_FDCWD, path as u64, flags as u64, 0) as i64 } } fn sys_access(pathname: *const u8, mode: i32) -> i32 { unsafe { - libbreenix::raw::syscall2( - libbreenix::syscall::nr::ACCESS, + libbreenix::raw::syscall4( + libbreenix::syscall::nr::FACCESSAT, + AT_FDCWD, pathname as u64, mode as u64, + 0, // flags ) as i32 } } diff --git a/userspace/programs/src/signal_regs_test.rs b/userspace/programs/src/signal_regs_test.rs index 89b860af..8c8e5654 100644 --- a/userspace/programs/src/signal_regs_test.rs +++ b/userspace/programs/src/signal_regs_test.rs @@ -228,16 +228,16 @@ fn main() { "mov x21, {e21}", "mov x22, {e22}", "mov x23, {e23}", - // kill(my_pid, SIGUSR1) - syscall 62 - "mov x8, 62", + // kill(my_pid, SIGUSR1) - syscall 129 (Linux ARM64) + "mov x8, 129", "mov x0, {pid}", "mov x1, 10", "svc #0", // Yield loop to allow signal delivery (100 iterations) - // sched_yield = syscall 3 + // sched_yield = syscall 124 (Linux ARM64) "mov x9, 100", "2:", - "mov x8, 3", + "mov x8, 124", "svc #0", "sub x9, x9, 1", "cbnz x9, 2b", diff --git a/userspace/programs/src/telnetd.rs b/userspace/programs/src/telnetd.rs index ca4f9fc9..e1ce5985 100644 --- a/userspace/programs/src/telnetd.rs +++ b/userspace/programs/src/telnetd.rs @@ -19,7 +19,8 @@ const SHELL_PATH: &[u8] = b"/bin/bsh\0"; // setsockopt constants (not yet in libbreenix) const SOL_SOCKET: i32 = 1; const SO_REUSEADDR: i32 = 2; -const SYS_SETSOCKOPT: u64 = 54; +use libbreenix::syscall::nr; +const SYS_SETSOCKOPT: u64 = nr::SETSOCKOPT; /// Set a file descriptor to non-blocking mode fn set_nonblocking(fd: Fd) {