Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 | /* SPDX-License-Identifier: GPL-2.0+ */ /* * 64-bit x86 Startup Code with integrated 32-bit init * * Entry point _start is .code32, called from start16.S after the * 16-to-32-bit transition. This sets up an identity-mapped page table * and transitions to 64-bit mode before calling into the normal * board_init_f() flow. * * The 32-bit section uses position-independent code (call/pop for the * instruction pointer) because the 64-bit binary is linked as PIE. * * Copyright 2026 Canonical Ltd * Written by Simon Glass <simon.glass@canonical.com> */ #include <config.h> #include <asm/msr-index.h> #include <asm/processor.h> #include <asm/processor-flags.h> /* * Page-table base address - must be 4KB aligned and below 4GB. * Uses 24KB total: PML4 (4KB) + PDPT (4KB) + 4 PD tables (4KB each) */ #define PT_BASE 0x80000 /* ------------------------------------------------------------------ */ .section .text.start .code32 .globl _start .type _start, @function _start: /* Load the segment registers to match the GDT loaded in start16.S */ movl $(X86_GDT_ENTRY_32BIT_DS * X86_GDT_ENTRY_SIZE), %eax movl %eax, %ds movl %eax, %es movl %eax, %gs movl %eax, %ss /* Set up the stack in the CAR/SRAM region */ movl $(CONFIG_SYS_CAR_ADDR + CONFIG_SYS_CAR_SIZE - 4), %esp /* Clear IDT */ subl $8, %esp movl $0, 4(%esp) /* base = 0 */ movw $0, 2(%esp) /* padding */ movw $0, (%esp) /* limit = 0 */ lidt (%esp) addl $8, %esp /* * Get our runtime address into %ebx so we can reference data * position-independently (the 64-bit binary is linked as PIE) */ call 2f 2: popl %ebx /* * Copy the boot GDT from ROM to RAM and load it from there. * KVM's EPT may not allow data reads from the ROM region, so * the GDT must be in RAM for the far jump to read the 64-bit * CS descriptor. */ #define GDT_RAM 0x2000 leal (boot_gdt - 2b)(%ebx), %esi movl $GDT_RAM, %edi movl $((boot_gdt_end - boot_gdt) / 4), %ecx cld rep movsl subl $8, %esp movl $GDT_RAM, 2(%esp) /* base in RAM */ movw $(boot_gdt_end - boot_gdt - 1), (%esp) /* limit */ lgdt (%esp) addl $8, %esp /* * Build identity-mapped page tables at PT_BASE (maps 4GB with * 2MB pages). This is similar to build_pagetable() in * arch/x86/cpu/i386/cpu.c (which also sets the US/A/DT bits) * but must be done in assembly because page tables are needed * to enter 64-bit mode and all C code in this build is compiled * for 64-bit. * * Layout (24KB total): * PT_BASE + 0x0000 PML4 (512 entries, only [0] used) * PT_BASE + 0x1000 PDPT (512 entries, [0]..[3] used) * PT_BASE + 0x2000 PD for 0-1GB (512 * 2MB entries) * PT_BASE + 0x3000 PD for 1-2GB * PT_BASE + 0x4000 PD for 2-3GB * PT_BASE + 0x5000 PD for 3-4GB */ /* Zero 24KB */ movl $PT_BASE, %edi xorl %eax, %eax movl $(6 * 4096 / 4), %ecx rep stosl /* PML4[0] -> PDPT */ movl $(PT_BASE + 0x1000 + 0x03), %eax /* Present + RW */ movl %eax, PT_BASE /* PDPT[0..3] -> four PD tables */ movl $(PT_BASE + 0x2000 + 0x03), %eax movl %eax, (PT_BASE + 0x1000 + 0 * 8) addl $0x1000, %eax movl %eax, (PT_BASE + 0x1000 + 1 * 8) addl $0x1000, %eax movl %eax, (PT_BASE + 0x1000 + 2 * 8) addl $0x1000, %eax movl %eax, (PT_BASE + 0x1000 + 3 * 8) /* * Fill the four PD tables (2048 entries total). * Each entry maps a 2MB page: address | PS(bit7) | RW | P */ movl $(PT_BASE + 0x2000), %edi movl $0x00000083, %eax /* 0MB, PS + RW + P */ movl $2048, %ecx 1: movl %eax, (%edi) movl $0, 4(%edi) /* high 32 bits = 0 */ addl $0x200000, %eax /* next 2MB page */ addl $8, %edi decl %ecx jnz 1b /* * Transition to 64-bit long mode. This is similar to * cpu_call64() in arch/x86/cpu/i386/call64.S but uses lret * instead of ljmp (which would emit a PIE-incompatible * relocation). It also enables SSE which call64.S does not * need to do. */ /* Disable paging (should already be off after reset) */ movl %cr0, %eax andl $~X86_CR0_PG, %eax movl %eax, %cr0 /* Enable PAE and SSE (x86_64 gcc assumes SSE2 is available) */ movl %cr4, %eax orl $(X86_CR4_PAE | X86_CR4_OSFXSR), %eax movl %eax, %cr4 /* Clear CR0.EM so SSE instructions do not fault */ movl %cr0, %eax andl $~X86_CR0_EM, %eax movl %eax, %cr0 /* Point CR3 at PML4 */ movl $PT_BASE, %eax movl %eax, %cr3 /* Enable Long Mode in EFER */ movl $MSR_EFER, %ecx rdmsr btsl $_EFER_LME, %eax wrmsr /* Enable paging -> activates long mode */ movl %cr0, %eax orl $X86_CR0_PG, %eax movl %eax, %cr0 /* * Jump to 64-bit code segment. Use lret to avoid the * PIE-incompatible relocation that a direct ljmp would emit. */ leal (start64 - 2b)(%ebx), %eax pushl $(X86_GDT_ENTRY_64BIT_CS * X86_GDT_ENTRY_SIZE) pushl %eax lret /* ------------------------------------------------------------------ */ .code64 start64: /* Set up memory using the existing stack */ mov %rsp, %rdi call board_init_f_alloc_reserve mov %rax, %rsp call board_init_f_init_reserve xor %rdi, %rdi call board_init_f call board_init_f_r /* Should not return here */ jmp . .globl board_init_f_r_trampoline64 .type board_init_f_r_trampoline64, @function board_init_f_r_trampoline64: /* * SDRAM has been initialised, U-Boot code has been copied into * RAM, BSS has been cleared and relocation adjustments have been * made. It is now time to jump into the in-RAM copy of U-Boot * * %rsi = Address of top of new stack * %rdi = New gd */ /* Stack grows down from top of SDRAM */ movq %rsi, %rsp /* Re-enter U-Boot by calling board_init_f_r() */ call board_init_f_r /* ------------------------------------------------------------------ */ /* Data */ /* ------------------------------------------------------------------ */ /* * Boot GDT - includes valid 32-bit CS/DS entries (matching start16.S's * selectors 0x10 and 0x18) plus the 64-bit CS at entry 9 (selector * 0x48, matching U-Boot's standard GDT numbering). * * This is copied to RAM before use because KVM cannot perform the * implicit GDT data read from the ROM region during the far jump * to 64-bit mode. * * When arch_setup_gd() later loads the real GDT the CS selector (0x48) * remains valid. */ .align 16 boot_gdt: /* Entry 0: NULL */ .quad 0 /* Entry 1: unused (matches start16.S layout) */ .quad 0 /* Entry 2: 32-bit code segment (selector 0x10) */ .quad 0x00cf9b000000ffff /* Entry 3: 32-bit data segment (selector 0x18) */ .quad 0x00cf93000000ffff /* Entries 4-8: unused */ .fill 5, 8, 0 /* Entry 9: 64-bit code segment (selector 0x48) */ .quad 0x00af9a000000ffff /* Entry 10-11: unused (keep GDT same size as real one) */ .quad 0 .quad 0 boot_gdt_end: |