Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion include/l4/ipc.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@
#define IPC_TI_MAP_GRANT 0x8
#define IPC_TI_GRANT 0x2

#define IPC_MR_COUNT 16
/* Total MR capacity with short message buffer:
* MR0-MR7: 8 words in registers (ctx.regs[0-7])
* MR8-MR39: 32 words in msg_buffer[0-31]
* MR40-MR47: 8 words in UTCB (utcb->mr[0-7])
* Total: 48 MRs = 192 bytes
*/
#define IPC_MR_COUNT 48

typedef union {
struct {
Expand Down
11 changes: 9 additions & 2 deletions include/l4/utcb.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,15 @@ struct utcb {
uint32_t thread_word_1;
uint32_t thread_word_2;
/* +12w */
uint32_t mr[8]; /* MRs 8-15 (0-8 are laying in
r4..r11 [thread's context]) */
/* Message Registers (MR) mapping with short message buffer:
* MR0-MR7: Hardware registers R4-R11 (ctx.regs[0-7]) - 32 bytes
* MR8-MR39: Short message buffer (tcb->msg_buffer[0-31]) - 128 bytes
* MR40-MR47: UTCB overflow (mr[0-7]) - 32 bytes
*
* Total message capacity: 192 bytes (48 words)
* Fastpath capacity: 160 bytes (40 words, MR0-MR39)
*/
uint32_t mr[8]; /* MRs 40-47 (overflow beyond short buffer) */
/* +20w */
uint32_t br[8];
/* +28w */
Expand Down
204 changes: 204 additions & 0 deletions include/platform/ipc-fastpath.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
/* Copyright (c) 2026 The F9 Microkernel Project. All rights reserved.
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/

#ifndef PLATFORM_IPC_FASTPATH_H_
#define PLATFORM_IPC_FASTPATH_H_

#include <debug.h>
#include <ipc.h>
#include <platform/armv7m.h>
#include <platform/irq.h>
#include <sched.h>
#include <syscall.h>
#include <thread.h>
#include <types.h>

/**
* Fastpath IPC optimization for ARM Cortex-M.
*
* Bypasses softirq scheduling for simple short-message IPC by performing
* direct register transfer in the SVC handler context.
*
* CRITICAL: Message registers (MR0-MR7) are in R4-R11, NOT R0-R7.
* - R0-R3: Syscall parameters (to_tid, from_tid, timeout, unused)
* - R4-R11: Message registers (MR0-MR7) via ctx.regs[0-7]
*
* This implementation uses a NAKED wrapper to capture R4-R11 immediately
* before any compiler-generated prologue can clobber them.
*
* Implementation is in header as static inline for zero call overhead.
*/

/**
* ipc_fastpath_copy_mrs() - Copy message registers to receiver
* @saved_mrs: Saved message registers R4-R11 (MR0-MR7)
* @sender: Source thread (for msg_buffer access)
* @receiver: Destination thread
* @n_untyped: Number of untyped words to copy (0-39)
*
* Copies MR0-MR{n_untyped} from sender to receiver:
* - MR0-MR7: From saved_mrs to receiver->ctx.regs[0-7]
* - MR8-MR39: From sender->msg_buffer to receiver->msg_buffer (NEW)
*
* WCET: ~20 cycles (MR0-MR7) + ~100 cycles (MR8-MR39, if used)
*/
static inline void ipc_fastpath_copy_mrs(volatile uint32_t *saved_mrs,
struct tcb *sender,
struct tcb *receiver,
int n_untyped)
{
int count = n_untyped + 1; /* +1 for tag in MR0 */
int i;

/* Phase 1: Copy MR0-MR7 from saved registers (R4-R11) */
for (i = 0; i < count && i < 8; i++)
receiver->ctx.regs[i] = saved_mrs[i];

/* Phase 2: Copy MR8-MR39 from sender's msg_buffer (if needed) */
if (count > 8) {
int buf_count = count - 8; /* Number of words in buffer */
if (buf_count > 32)
buf_count = 32; /* Clamp to buffer size */

for (i = 0; i < buf_count; i++)
receiver->msg_buffer[i] = sender->msg_buffer[i];
}
}

/**
* ipc_fastpath_helper() - Fastpath IPC implementation (C helper)
* @caller: Current thread attempting IPC
* @svc_param: SVC stack frame (R0-R3, R12, LR, PC, xPSR)
* @saved_mrs: Pre-saved R4-R11 message registers
*
* Called by the naked wrapper after message registers have been captured.
* Returns 1 if fastpath succeeded, 0 to fall back to slowpath.
*/
static inline int ipc_fastpath_helper(struct tcb *caller,
uint32_t *svc_param,
volatile uint32_t *saved_mrs)
{
struct tcb *to_thr;
l4_thread_t to_tid, from_tid;
ipc_msg_tag_t tag;

/* Extract IPC parameters from hardware stack (R0-R3) */
to_tid = svc_param[REG_R0];
from_tid = svc_param[REG_R1];

/* Extract tag from saved MR0 (R4), NOT from R0! */
tag.raw = saved_mrs[0];

/* Fastpath Eligibility Check */

/* Criterion 1: Simple send (to_tid valid, from_tid = NILTHREAD) */
if (to_tid == L4_NILTHREAD || from_tid != L4_NILTHREAD)
return 0; /* Slowpath: receive-only or send+receive */

/* Criterion 2: No typed items (no MapItems/GrantItems) */
if (tag.s.n_typed != 0)
return 0; /* Slowpath: requires map_area() processing */

/* Criterion 3: Short message (fits in MR0-MR39: registers + buffer)
* MR0-MR7: 8 words × 4 bytes = 32 bytes (registers)
* MR8-MR39: 32 words × 4 bytes = 128 bytes (buffer)
* Total capacity: 160 bytes (40 words)
*/
if (tag.s.n_untyped > 39)
return 0; /* Slowpath: requires UTCB access */

/* Criterion 4: Receiver exists and is blocked waiting */
to_thr = thread_by_globalid(to_tid);
if (!to_thr || to_thr->state != T_RECV_BLOCKED)
return 0; /* Slowpath: receiver not ready */

/* Criterion 5: Receiver is waiting for us */
if (to_thr->ipc_from != L4_ANYTHREAD &&
to_thr->ipc_from != caller->t_globalid)
return 0; /* Slowpath: receiver waiting for someone else */

/* Criterion 6: Special thread handling */
if (to_tid == TID_TO_GLOBALID(THREAD_LOG) ||
to_tid == TID_TO_GLOBALID(THREAD_IRQ_REQUEST))
return 0; /* Slowpath: special kernel threads */

/* Criterion 7: Thread start protocol */
if (tag.raw == 0x00000005)
return 0; /* Slowpath: thread initialization */

/* All criteria met - Execute Fastpath */

/* Phase 0: Dequeue caller (will re-enqueue later) */
extern void sched_dequeue(struct tcb *);
sched_dequeue(caller);

/* Phase 1: Copy message registers from sender to receiver
* - MR0-MR7: From saved registers (R4-R11)
* - MR8-MR39: From sender's msg_buffer to receiver's msg_buffer (if
* needed)
*/
ipc_fastpath_copy_mrs(saved_mrs, caller, to_thr, tag.s.n_untyped);

/* Phase 2: Update receiver context */
/* Set R0 to sender ID (IPC protocol) */
((uint32_t *) to_thr->ctx.sp)[REG_R0] = caller->t_globalid;
to_thr->utcb->sender = caller->t_globalid;

/* Phase 3: Update thread states */

/* Clear timeout events (no timeout in fastpath) */
caller->timeout_event = 0;
to_thr->timeout_event = 0;

/* Receiver becomes runnable with IPC priority boost */
to_thr->state = T_RUNNABLE;
to_thr->ipc_from = L4_NILTHREAD;
sched_set_priority(to_thr, SCHED_PRIO_IPC);
sched_enqueue(to_thr);

/* Caller continues (send-only, no reply expected)
* Fastpath only handles from_tid==NILTHREAD (simple send).
* For L4_Call (send+receive), slowpath handles blocking.
*
* Re-enqueue caller (was dequeued at SVC entry).
* It's safe to enqueue current thread - sched has double-enqueue
* protection.
*/
caller->state = T_RUNNABLE;
sched_enqueue(caller);

/* Phase 4: Request context switch via PendSV */
/* DON'T do immediate switch - let PendSV handle it normally */
request_schedule();

return 1; /* Fastpath succeeded */
}

/**
* ipc_try_fastpath() - IPC fastpath using pre-saved R4-R11
* @caller: Current thread attempting IPC
* @svc_param: SVC stack frame (R0-R3, R12, LR, PC, xPSR)
*
* Reads message registers from __irq_saved_regs which were saved by
* SVC_HANDLER before any C code ran, ensuring MR0-MR7 are untouched.
*
* Returns:
* 1 if fastpath succeeded (caller should skip slowpath)
* 0 if fastpath unavailable (caller must use slowpath)
*
* Eligibility criteria:
* - Simple send (to_tid valid, from_tid == NILTHREAD)
* - Short message (n_untyped <= 39, n_typed == 0)
* - Receiver ready (T_RECV_BLOCKED, waiting for caller or ANYTHREAD)
*/
static inline int ipc_try_fastpath(struct tcb *caller, uint32_t *svc_param)
{
extern volatile uint32_t __irq_saved_regs[8];

/* Read from global __irq_saved_regs saved by SVC_HANDLER */
return ipc_fastpath_helper(caller, svc_param, __irq_saved_regs);
}

#endif /* PLATFORM_IPC_FASTPATH_H_ */
32 changes: 32 additions & 0 deletions include/platform/irq.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,38 @@ extern volatile uint32_t __irq_saved_regs[8];
request_schedule(); \
irq_return(); \
}

/*
* SVC_HANDLER - Specialized handler for SVC exceptions requiring R4-R11
* preservation.
*
* Unlike IRQ_HANDLER, this variant saves R4-R11 to __irq_saved_regs BEFORE
* calling the C handler, ensuring message registers are captured untouched.
*
* This is critical for IPC fastpath optimization where user message registers
* (MR0-MR7 in R4-R11) must be available before any C code runs.
*
* IMPORTANT: Must restore R4-R11 before returning since the C handler may
* clobber these registers.
*
* Usage: SVC_HANDLER(svc_handler, __svc_handler);
*/
#define SVC_HANDLER(name, sub) \
void name(void) __NAKED; \
void name(void) \
{ \
irq_enter(); \
irq_save_regs_only(); \
sub(); \
request_schedule(); \
/* Restore R4-R11 before returning */ \
__asm__ __volatile__( \
"ldr r0, =__irq_saved_regs\n\t" \
"ldm r0, {r4-r11}" :: \
: "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", \
"memory"); \
irq_return(); \
}
extern volatile tcb_t *current;

#endif /* PLATFORM_IRQ_H_ */
16 changes: 16 additions & 0 deletions include/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,22 @@ struct tcb {
uint8_t notify_pending;

uint8_t _notify_pad[1]; /* Alignment padding */

/* Short message buffer for IPC fastpath optimization.
* Extends fastpath coverage from 32 bytes (registers only) to 160 bytes.
* Maps to L4 virtual registers MR8-MR39 (32 additional 4-byte registers).
*
* Memory layout:
* - MR0-MR7: ctx.regs[0-7] (32 bytes, R4-R11 hardware registers)
* - MR8-MR39: msg_buffer[0-31] (128 bytes, TCB-embedded buffer)
* - MR40-MR47: utcb->mr[0-7] (32 bytes, UTCB overflow)
*
* Fastpath eligibility: n_untyped <= 39 (160 bytes total)
* Expected fastpath coverage: 70% → 95%
*
* RAM impact: 128 bytes × 20 threads = 2.5 KB (1.3% of 192KB)
*/
uint32_t msg_buffer[32];
};
typedef struct tcb tcb_t;

Expand Down
52 changes: 34 additions & 18 deletions kernel/ipc.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,33 @@ static inline void thread_make_runnable(tcb_t *thr)
sched_enqueue(thr);
}

/* Read message register with short buffer support.
* MR0-MR7: Hardware registers R4-R11 (ctx.regs[0-7])
* MR8-MR39: Short message buffer (msg_buffer[0-31]) - NEW
* MR40-MR47: UTCB overflow (utcb->mr[0-7])
*/
uint32_t ipc_read_mr(tcb_t *from, int i)
{
if (i >= 8)
return from->utcb->mr[i - 8];
return from->ctx.regs[i];
if (i < 8)
return from->ctx.regs[i];
if (i < 40)
return from->msg_buffer[i - 8];
return from->utcb->mr[i - 40];
}

/* Write message register with short buffer support.
* MR0-MR7: Hardware registers R4-R11 (ctx.regs[0-7])
* MR8-MR39: Short message buffer (msg_buffer[0-31]) - NEW
* MR40-MR47: UTCB overflow (utcb->mr[0-7])
*/
void ipc_write_mr(tcb_t *to, int i, uint32_t data)
{
if (i >= 8)
to->utcb->mr[i - 8] = data;
else
if (i < 8)
to->ctx.regs[i] = data;
else if (i < 40)
to->msg_buffer[i - 8] = data;
else
to->utcb->mr[i - 40] = data;
}

static void user_ipc_error(tcb_t *thr, enum user_error_t error)
Expand Down Expand Up @@ -351,19 +365,19 @@ void sys_ipc(uint32_t *param1)
} else if (to_thr && to_thr->state == T_INACTIVE &&
GLOBALID_TO_TID(to_thr->utcb->t_pager) ==
GLOBALID_TO_TID(caller->t_globalid)) {
if (ipc_read_mr(caller, 0) == 0x00000005) {
/* mr1: thread func, mr2: stack addr,
* mr3: stack size
* mr4: thread entry, mr5: thread args
* thread start protocol */
uint32_t tag = ipc_read_mr(caller, 0);
if (tag == 0x00000005) {
/* Thread start protocol from pager:
* mr1: thread_container (wrapper), mr2: sp,
* mr3: stack size, mr4: entry point, mr5: entry arg */

uint32_t mr1_container = ipc_read_mr(caller, 1);
memptr_t sp = ipc_read_mr(caller, 2);
size_t stack_size = ipc_read_mr(caller, 3);
uint32_t entry_point = ipc_read_mr(caller, 4);
uint32_t entry_arg = ipc_read_mr(caller, 5);
uint32_t regs[4]; /* r0, r1, r2, r3 */

dbg_printf(DL_IPC, "IPC: %t thread start sp:%p stack_size:%p\n",
to_tid, sp, stack_size);

/* Security check: Ensure stack is in user-writable memory */
int pid = mempool_search(sp - stack_size, stack_size);
mempool_t *mp = mempool_getbyid(pid);
Expand All @@ -387,10 +401,12 @@ void sys_ipc(uint32_t *param1)

regs[REG_R0] = (uint32_t) &kip;
regs[REG_R1] = (uint32_t) to_thr->utcb;
regs[REG_R2] = ipc_read_mr(caller, 4);
regs[REG_R3] = ipc_read_mr(caller, 5);
thread_init_ctx((void *) sp, (void *) ipc_read_mr(caller, 1),
regs, to_thr);
regs[REG_R2] =
entry_point; /* Actual entry passed to container */
regs[REG_R3] = entry_arg;

thread_init_ctx((void *) sp, (void *) mr1_container, regs,
to_thr);

thread_make_runnable(caller);

Expand Down
Loading
Loading