Multicore Real-Time Operating System RTEMS (14) – BSP Initialization

Technical experience sharing, welcome to follow and provide guidance.

Before performing the boot_card initialization, the BSP hook is set up in advance. This article analyzes the code of bsp_start_hook_1.

A. AArch64 Start Set Vector Base

This writes the vector table to the exception vector table VBAR_EL1, with the variable bsp_start_vector_table_begin defined as follows:

bsp_start_vector_table_begin:
.balign 0x800
Vector_table_el3:
/*
 * The exception handler for synchronous exceptions from the current EL
 * using SP0.
 */
curr_el_sp0_sync:
    subsp, sp, #AARCH64_EXCEPTION_FRAME_SIZE           /* reserve space for CEF */
    strlr, [sp, #AARCH64_EXCEPTION_FRAME_REGISTER_LR_OFFSET]   /* shove lr into CEF */
    bl.push_exception_context_start                /* bl to CEF store routine */
/* Save original sp in x0 for .push_exception_context_finish */
    addx0, sp, #AARCH64_EXCEPTION_FRAME_SIZE           /* save original sp */
/* Push the remainder of the context */
    bl.push_exception_context_finish
/* get jump target and branch/link */
    blcurr_el_sp0_sync_get_pc      /* Get current execution address */
curr_el_sp0_sync_get_pc:            /* The current PC is now in LR */
    movx0, #0x7f               /* Mask to use in BIC, lower 7 bits */
    bicx0, lr, x0          /* Mask LR to base of current vector */
    ldrx1, [x0,    #0x78]          /* Load target from last word in vector */
    andlr, lr, #0x780          /* Mask off bits for vector number */
    lsrlr, lr, #7              /* Shift the vector bits down */
/* Store the vector */
    strlr, [sp, #AARCH64_EXCEPTION_FRAME_REGISTER_VECTOR_OFFSET]
    movx0, sp
    blrx1
    btwiddle
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
/* Takes up the space of 2 instructions */
#ifdefAARCH64_MULTILIB_ARCH_V8_ILP32
    .word _AArch64_Exception_default
    .word 0x0
#else
    .dword _AArch64_Exception_default
#endif
.balign 0x80
/* The exception handler for IRQ exceptions from the current EL using SP0. */
curr_el_sp0_irq:
    stpx0, lr, [sp, #-0x10]!   /* Push x0,lr on to the stack */
    blcurr_el_sp0_irq_get_pc   /* Get current execution address */
curr_el_sp0_irq_get_pc:         /* The current PC is now in LR */
    JUMP_HANDLER
    JUMP_TARGET_SP0
.balign 0x80
/* The exception handler for FIQ exceptions from the current EL using SP0. */
curr_el_sp0_fiq:
    stpx0, lr, [sp, #-0x10]!   /* Push x0,lr on to the stack */
    blcurr_el_sp0_fiq_get_pc   /* Get current execution address */
curr_el_sp0_fiq_get_pc:         /* The current PC is now in LR */
    JUMP_HANDLER
    JUMP_TARGET_SP0
.balign 0x80
/*
 * The exception handler for system error exceptions from the current EL using
 * SP0.
 */
curr_el_sp0_serror:
    stpx0, lr, [sp, #-0x10]!   /* Push x0,lr on to the stack */
    blcurr_el_sp0_serror_get_pc    /* Get current execution address */
curr_el_sp0_serror_get_pc:      /* The current PC is now in LR */
    JUMP_HANDLER
    JUMP_TARGET_SP0
.balign 0x80
/*
 * The exception handler for synchronous exceptions from the current EL using
 * the current SP.
 */
curr_el_spx_sync:
    msrspsel, #0                           /* switch to exception stack */
    subsp, sp, #AARCH64_EXCEPTION_FRAME_SIZE           /* reserve space for CEF */
    strlr, [sp, #AARCH64_EXCEPTION_FRAME_REGISTER_LR_OFFSET]   /* shove lr into CEF */
    bl.push_exception_context_start                /* bl to CEF store routine */
/* Save original sp in x0 for .push_exception_context_finish */
    msrspsel, #1
    movx0, sp
    msrspsel, #0
/* Push the remainder of the context */
    bl.push_exception_context_finish
/* get jump target and branch/link */
    blcurr_el_spx_sync_get_pc      /* Get current execution address */
curr_el_spx_sync_get_pc:            /* The current PC is now in LR */
    movx0, #0x7f               /* Mask to use in BIC, lower 7 bits */
    bicx0, lr, x0          /* Mask LR to base of current vector */
    ldrx1, [x0,    #0x78]          /* Load target from last word in vector */
    andlr, lr, #0x780          /* Mask off bits for vector number */
    lsrlr, lr, #7              /* Shift the vector bits down */
/* Store the vector */
    strlr, [sp, #AARCH64_EXCEPTION_FRAME_REGISTER_VECTOR_OFFSET]
    movx0, sp
    blrx1
    btwiddle
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
/* Takes up the space of 2 instructions */
#ifdefAARCH64_MULTILIB_ARCH_V8_ILP32
    .word _AArch64_Exception_default
    .word 0x0
#else
    .dword _AArch64_Exception_default
#endif
.balign 0x80
/*
 * The exception handler for IRQ exceptions from the current EL using the
 * current SP.
 */
curr_el_spx_irq:
    stpx0, lr, [sp, #-0x10]!   /* Push x0,lr on to the stack */
    blcurr_el_spx_irq_get_pc   /* Get current execution address */
curr_el_spx_irq_get_pc:         /* The current PC is now in LR */
    JUMP_HANDLER
    JUMP_TARGET_SPx
.balign 0x80
/*
 * The exception handler for FIQ exceptions from the current EL using the
 * current SP.
 */
curr_el_spx_fiq:
    stpx0, lr, [sp, #-0x10]!   /* Push x0,lr on to the stack */
    blcurr_el_spx_fiq_get_pc   /* Get current execution address */
curr_el_spx_fiq_get_pc:         /* The current PC is now in LR */
    JUMP_HANDLER
    JUMP_TARGET_SPx
.balign 0x80
/*
 * The exception handler for system error exceptions from the current EL using
 * the current SP.
 */
curr_el_spx_serror:
    stpx0, lr, [sp, #-0x10]!   /* Push x0,lr on to the stack */
    blcurr_el_spx_serror_get_pc    /* Get current execution address */
curr_el_spx_serror_get_pc:      /* The current PC is now in LR */
    JUMP_HANDLER
    JUMP_TARGET_SPx
.balign 0x80
/*
 * The exception handler for synchronous exceptions from a lower EL (AArch64).
 */
lower_el_aarch64_sync:
    stpx0, lr, [sp, #-0x10]!   /* Push x0,lr on to the stack */
    bllower_el_aarch64_sync_get_pc/* Get current execution address */
lower_el_aarch64_sync_get_pc:       /* The current PC is now in LR */
    JUMP_HANDLER
    JUMP_TARGET_SPx
.balign 0x80
/* The exception handler for IRQ exceptions from a lower EL (AArch64). */
lower_el_aarch64_irq:
    stpx0, lr, [sp, #-0x10]!   /* Push x0,lr on to the stack */
    bllower_el_aarch64_irq_get_pc/* Get current execution address */
lower_el_aarch64_irq_get_pc:        /* The current PC is now in LR */
    JUMP_HANDLER
    JUMP_TARGET_SPx
.balign 0x80
/* The exception handler for FIQ exceptions from a lower EL (AArch64). */
lower_el_aarch64_fiq:
    stpx0, lr, [sp, #-0x10]!   /* Push x0,lr on to the stack */
    bllower_el_aarch64_fiq_get_pc/* Get current execution address */
lower_el_aarch64_fiq_get_pc:        /* The current PC is now in LR */
    JUMP_HANDLER
    JUMP_TARGET_SPx
.balign 0x80
/*
 * The exception handler for system error exceptions from a lower EL(AArch64).
 */
lower_el_aarch64_serror:
/* Push x0,lr on to the stack */
    stpx0, lr, [sp, #-0x10]!
/* Get current execution address */
    bllower_el_aarch64_serror_get_pc
lower_el_aarch64_serror_get_pc:     /* The current PC is now in LR */
    JUMP_HANDLER
    JUMP_TARGET_SPx
.balign 0x80
/*
 * The exception handler for the synchronous exception from a lower EL(AArch32).
 */
lower_el_aarch32_sync:
    stpx0, lr, [sp, #-0x10]!   /* Push x0,lr on to the stack */
    bllower_el_aarch32_sync_get_pc/* Get current execution address */
lower_el_aarch32_sync_get_pc:       /* The current PC is now in LR */
    JUMP_HANDLER
    JUMP_TARGET_SPx
.balign 0x80
/* The exception handler for the IRQ exception from a lower EL (AArch32). */
lower_el_aarch32_irq:
    stpx0, lr, [sp, #-0x10]!   /* Push x0,lr on to the stack */
    bllower_el_aarch32_irq_get_pc/* Get current execution address */
lower_el_aarch32_irq_get_pc:        /* The current PC is now in LR */
    JUMP_HANDLER
    JUMP_TARGET_SPx
.balign 0x80
/* The exception handler for the FIQ exception from a lower EL (AArch32). */
lower_el_aarch32_fiq:
    stpx0, lr, [sp, #-0x10]!   /* Push x0,lr on to the stack */
    bllower_el_aarch32_fiq_get_pc/* Get current execution address */
lower_el_aarch32_fiq_get_pc:        /* The current PC is now in LR */
    JUMP_HANDLER
    JUMP_TARGET_SPx
.balign 0x80
/*
 * The exception handler for the system error exception from a lower EL
 * (AArch32).
 */
lower_el_aarch32_serror:
/* Push x0,lr on to the stack */
    stpx0, lr, [sp, #-0x10]!
/* Get current execution address */
    bllower_el_aarch32_serror_get_pc
lower_el_aarch32_serror_get_pc  :       /* The current PC is now in LR */
    JUMP_HANDLER
    JUMP_TARGET_SPx

bsp_start_vector_table_end:

As can be seen, this contains a preset series of exception vector tables. Each vector table is aligned to 0x80.

B. _SMP_Get_current_processor

This retrieves the current processor index, as follows:

uint32_t _CPU_SMP_Get_current_processor( void )
{
  return _Per_CPU_Get_index( _CPU_Get_current_per_CPU_control() );
}

For _CPU_Get_current_per_CPU_control, the implementation is as follows:

static inline
struct Per_CPU_Control *_AARCH64_Get_current_per_CPU_control( void )
{
struct Per_CPU_Control *cpu_self;
  uint64_t value;

  __asm__ volatile (
    "mrs %0, TPIDR_EL1" : "=&r" ( value ) : : "memory"
  );

/* Use EL1 Thread ID Register (TPIDR_EL1) */
  cpu_self = (struct Per_CPU_Control *)(uintptr_t)value;

return cpu_self;
}

This retrieves the value of the TPIDR_EL1 register.

As for _Per_CPU_Get_index, the implementation is as follows:

static inlineuint32_t _Per_CPU_Get_index( const Per_CPU_Control *cpu )
{
#if defined(RTEMS_SMP)
const Per_CPU_Control_envelope *per_cpu_envelope =
    ( const Per_CPU_Control_envelope * ) cpu;

return ( uint32_t ) ( per_cpu_envelope - &_Per_CPU_Information[ 0 ] );
#else
  (void) cpu;
return0;
#endif
}

This takes cpu_self and subtracts &_Per_CPU_Information[ 0 ], which is the address of _Per_CPU_Information. We can see the following code:

ldr x1, =_Per_CPU_Information
add x1, x1, x0, lsl #PER_CPU_CONTROL_SIZE_LOG2 # 10
msr TPIDR_EL1, x1

Here x0 is obtained from mpidr_el1.

mpidr_el1 has been mentioned in the introduction to Aarch64 registers, where 0xff obtains the CPU affinity. The code is as follows:

FUNCTION_ENTRY(_AArch64_Get_current_processor_for_system_start)

  /* Return the affinity level 0 reported by the MPIDR_EL1 */
  mrs x0, mpidr_el1
  and x0, x0, #0xff
  ret

FUNCTION_END(_AArch64_Get_current_processor_for_system_start)

Here x0 is 0, for:

add x1, x1, x0, lsl #10

It can be seen that x1 = x1 + x0 << 10. Here x1 remains x1, which is the address of _Per_CPU_Information.

Thus, _CPU_SMP_Get_current_processor returns 0.

C. zynqmp_setup_mmu_and_cache

First, note the aarch64_mmu_setup, the code is as follows:

BSP_START_TEXT_SECTION static inline void aarch64_mmu_setup( void )
{
/* Set TCR */
/* 256TB/48 bits mappable (64-0x10) */
  _AArch64_Write_tcr_el1(
    AARCH64_TCR_EL1_T0SZ( 0x10 ) | AARCH64_TCR_EL1_IRGN0( 0x1 ) |
    AARCH64_TCR_EL1_ORGN0( 0x1 ) | AARCH64_TCR_EL1_SH0( 0x3 ) |
    AARCH64_TCR_EL1_TG0( 0x0 ) | AARCH64_TCR_EL1_IPS( 0x5ULL ) |
    AARCH64_TCR_EL1_EPD1
  );

/* Set MAIR */
  _AArch64_Write_mair_el1(
    AARCH64_MAIR_EL1_ATTR0( 0x0 ) | AARCH64_MAIR_EL1_ATTR1( 0x4 ) |
    AARCH64_MAIR_EL1_ATTR2( 0x44 ) | AARCH64_MAIR_EL1_ATTR3( 0xFF )
  );
}

The TCR can be viewed in the introduction to the Aarch64 TCR register.

The mair_el1 memory attribute register can be viewed in the introduction to the Aarch64 MAIR register.

We can see that the value of mair_el1 is 0xffffffffff440400. It can be calculated as follows:

AARCH64_MAIR_EL1_ATTR0( 0x0 ) : Device-nGnRnE

AARCH64_MAIR_EL1_ATTR1( 0x4 ) : Device-nGnRE
AARCH64_MAIR_EL1_ATTR2( 0x44 ) : Normal-Inner+Outer Non-cacheable

AARCH64_MAIR_EL1_ATTR3( 0xFF ) : Normal-Inner+Outer  Write-Back Non-transient  Inner+Outer Read-Allocate, Inner+Outer Write-Allocate.

For the aarch64_mmu_setup_translation_table function, the implementation is as follows:

BSP_START_TEXT_SECTION void aarch64_mmu_setup_translation_table(
  aarch64_mmu_control *control,
  const aarch64_mmu_config_entry *config_table,
  size_t config_count
)
{
size_t i;

  aarch64_mmu_page_table_set_blocks(
    control-&gt;ttb,
    (uintptr_t) NULL,
    MMU_MAX_SUBTABLE_PAGE_BITS,
    0
  );

/* Configure entries required for each memory section */
for ( i = 0; i &lt; config_count; ++i ) {
    rtems_status_code sc;

    sc = aarch64_mmu_set_translation_table_entries( control, &amp;config_table[i] );

    if ( sc != RTEMS_SUCCESSFUL ) {
      bsp_fatal( AARCH64_FATAL_MMU_CANNOT_MAP_BLOCK );
    }
  }
}

This function sets the first-level page table for the MMU. The local variable is parsed as follows:

page_table: ttb
base: 0
bit_offset: 39
page_flag: 0
default_attr: 0
MMU_BITS_PER_LEVEL: Each level of the page table occupies 9 bits

For the loop ( uint64_t i = 0; i < ( 1 << MMU_BITS_PER_LEVEL ); i++ )

Here i ranges from 0 to 511 (bits=9).

For page_table[i] = base | ( i << bits_offset );

base | i < 39 sets the 512 page table indices to bits 39-48.

page_table[i] |= default_attr | page_flag; sets the default attributes and flags for the first-level page table.

Query the physical address range set in the ID_AA64MMFR0_EL1 register.

We note parange.

For the code, we can see id_reg is 0x0010. Thus, max_mappable is 1 << 40, which is 0x10000000000.

3.2 aarch64_mmu_map_block

Here we call aarch64_mmu_map_block, and we check as follows:

return aarch64_mmu_map_block(
  control,
  control-&gt;ttb,
  0x0,
  begin,
  size,
  -1,
  config-&gt;flags
);

The parameter parsing is as follows:

control: aarch64_mmu_instance
ttb: bsp_translation_table_base
root_address: 0
addr: aarch64_mmu_config_table→begin
size: aarch64_mmu_config_table size
level: -1
flag: aarch64_mmu_config_table-&gt;flag

It is worth noting that this function will recursively call (aarch64_mmu_map_block) for page table mapping, down to the PTE for the actual physical address, as follows:

page_table[index] = addr | flags | page_flag;

Here, regarding the query steps from ttbrx to the physical address, you can refer to my other articles, and I will not elaborate here.

3.3 aarch64_mmu_enable

Here we mainly focus on the following registers:

TTBR0_EL1
SCTLR_EL1

For TTBR0_EL1, this writes the ttb into ttbr0_el1.

For SCTLR_EL1, the following bitwise operations are performed:

sctlr |= AARCH64_SCTLR_EL1_I | AARCH64_SCTLR_EL1_C | AARCH64_SCTLR_EL1_M;

D. bsp_start_clear_bss

This directly uses memset to clear the BSS segment, as follows:

BSP_START_TEXT_SECTION static inline void bsp_start_clear_bss(void)
{
  memset(bsp_section_bss_begin, 0, (size_t) bsp_section_bss_size);
}

E. Conclusion

Thus, the introduction to the bsp_start_hook_1 process before entering the bootcard is complete.

Leave a Comment