Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 1 | /* |
| 2 | * relocate_kernel.S - put the kernel image in place to boot |
| 3 | * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> |
| 4 | * |
| 5 | * This source code is licensed under the GNU General Public License, |
| 6 | * Version 2. See the file COPYING for more details. |
| 7 | */ |
| 8 | |
| 9 | #include <linux/linkage.h> |
Jeremy Fitzhardinge | 0341c14 | 2009-02-13 11:14:01 -0800 | [diff] [blame] | 10 | #include <asm/page_types.h> |
Magnus Damm | 4bfaaef | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 11 | #include <asm/kexec.h> |
gorcunov@gmail.com | fd3af53 | 2008-03-23 00:00:08 +0300 | [diff] [blame] | 12 | #include <asm/processor-flags.h> |
Jeremy Fitzhardinge | 0341c14 | 2009-02-13 11:14:01 -0800 | [diff] [blame] | 13 | #include <asm/pgtable_types.h> |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 14 | |
Magnus Damm | 4bfaaef | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 15 | /* |
| 16 | * Must be relocatable PIC code callable as a C function |
| 17 | */ |
| 18 | |
| 19 | #define PTR(x) (x << 3) |
gorcunov@gmail.com | 366932d | 2008-03-23 00:00:09 +0300 | [diff] [blame] | 20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) |
Magnus Damm | 4bfaaef | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 21 | |
Huang Ying | fee7b0d | 2009-03-10 10:57:16 +0800 | [diff] [blame] | 22 | /* |
| 23 | * control_page + KEXEC_CONTROL_CODE_MAX_SIZE |
| 24 | * ~ control_page + PAGE_SIZE are used as data storage and stack for |
| 25 | * jumping back |
| 26 | */ |
| 27 | #define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) |
| 28 | |
| 29 | /* Minimal CPU state */ |
| 30 | #define RSP DATA(0x0) |
| 31 | #define CR0 DATA(0x8) |
| 32 | #define CR3 DATA(0x10) |
| 33 | #define CR4 DATA(0x18) |
| 34 | |
| 35 | /* other data */ |
| 36 | #define CP_PA_TABLE_PAGE DATA(0x20) |
| 37 | #define CP_PA_SWAP_PAGE DATA(0x28) |
| 38 | #define CP_PA_BACKUP_PAGES_MAP DATA(0x30) |
| 39 | |
Magnus Damm | 4bfaaef | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 40 | .text |
Cyrill Gorcunov | 288621e | 2008-03-21 23:12:14 +0300 | [diff] [blame] | 41 | .align PAGE_SIZE |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 42 | .code64 |
Magnus Damm | 4bfaaef | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 43 | .globl relocate_kernel |
| 44 | relocate_kernel: |
Huang Ying | fef3a7a | 2009-03-10 10:56:57 +0800 | [diff] [blame] | 45 | /* |
| 46 | * %rdi indirection_page |
Magnus Damm | 4bfaaef | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 47 | * %rsi page_list |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 48 | * %rdx start address |
Huang Ying | fee7b0d | 2009-03-10 10:57:16 +0800 | [diff] [blame] | 49 | * %rcx preserve_context |
Magnus Damm | 4bfaaef | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 50 | */ |
| 51 | |
Huang Ying | fee7b0d | 2009-03-10 10:57:16 +0800 | [diff] [blame] | 52 | /* Save the CPU context, used for jumping back */ |
| 53 | pushq %rbx |
| 54 | pushq %rbp |
| 55 | pushq %r12 |
| 56 | pushq %r13 |
| 57 | pushq %r14 |
| 58 | pushq %r15 |
| 59 | pushf |
| 60 | |
| 61 | movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 |
| 62 | movq %rsp, RSP(%r11) |
| 63 | movq %cr0, %rax |
| 64 | movq %rax, CR0(%r11) |
| 65 | movq %cr3, %rax |
| 66 | movq %rax, CR3(%r11) |
| 67 | movq %cr4, %rax |
| 68 | movq %rax, CR4(%r11) |
| 69 | |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 70 | /* zero out flags, and disable interrupts */ |
| 71 | pushq $0 |
| 72 | popfq |
| 73 | |
Huang Ying | fef3a7a | 2009-03-10 10:56:57 +0800 | [diff] [blame] | 74 | /* |
| 75 | * get physical address of control page now |
| 76 | * this is impossible after page table switch |
| 77 | */ |
Magnus Damm | 4bfaaef | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 78 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 79 | |
Magnus Damm | 4bfaaef | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 80 | /* get physical address of page table now too */ |
Huang Ying | fee7b0d | 2009-03-10 10:57:16 +0800 | [diff] [blame] | 81 | movq PTR(PA_TABLE_PAGE)(%rsi), %r9 |
| 82 | |
| 83 | /* get physical address of swap page now */ |
| 84 | movq PTR(PA_SWAP_PAGE)(%rsi), %r10 |
| 85 | |
| 86 | /* save some information for jumping back */ |
| 87 | movq %r9, CP_PA_TABLE_PAGE(%r11) |
| 88 | movq %r10, CP_PA_SWAP_PAGE(%r11) |
| 89 | movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11) |
Magnus Damm | 4bfaaef | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 90 | |
Huang Ying | f5deb79 | 2009-02-03 14:22:48 +0800 | [diff] [blame] | 91 | /* Switch to the identity mapped page tables */ |
Huang Ying | fee7b0d | 2009-03-10 10:57:16 +0800 | [diff] [blame] | 92 | movq %r9, %cr3 |
Magnus Damm | 4bfaaef | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 93 | |
| 94 | /* setup a new stack at the end of the physical control page */ |
gorcunov@gmail.com | a7bba17 | 2008-03-23 00:00:07 +0300 | [diff] [blame] | 95 | lea PAGE_SIZE(%r8), %rsp |
Magnus Damm | 4bfaaef | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 96 | |
| 97 | /* jump to identity mapped page */ |
| 98 | addq $(identity_mapped - relocate_kernel), %r8 |
| 99 | pushq %r8 |
| 100 | ret |
| 101 | |
| 102 | identity_mapped: |
Huang Ying | 050438e | 2011-07-14 09:34:37 +0800 | [diff] [blame] | 103 | /* set return address to 0 if not preserving context */ |
| 104 | pushq $0 |
Magnus Damm | 4bfaaef | 2006-09-26 10:52:38 +0200 | [diff] [blame] | 105 | /* store the start address on the stack */ |
| 106 | pushq %rdx |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 107 | |
Huang Ying | fef3a7a | 2009-03-10 10:56:57 +0800 | [diff] [blame] | 108 | /* |
| 109 | * Set cr0 to a known state: |
gorcunov@gmail.com | fd3af53 | 2008-03-23 00:00:08 +0300 | [diff] [blame] | 110 | * - Paging enabled |
| 111 | * - Alignment check disabled |
| 112 | * - Write protect disabled |
| 113 | * - No task switch |
| 114 | * - Don't do FP software emulation. |
| 115 | * - Proctected mode enabled |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 116 | */ |
| 117 | movq %cr0, %rax |
gorcunov@gmail.com | fd3af53 | 2008-03-23 00:00:08 +0300 | [diff] [blame] | 118 | andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax |
| 119 | orl $(X86_CR0_PG | X86_CR0_PE), %eax |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 120 | movq %rax, %cr0 |
| 121 | |
Huang Ying | fef3a7a | 2009-03-10 10:56:57 +0800 | [diff] [blame] | 122 | /* |
| 123 | * Set cr4 to a known state: |
gorcunov@gmail.com | fd3af53 | 2008-03-23 00:00:08 +0300 | [diff] [blame] | 124 | * - physical address extension enabled |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 125 | */ |
gorcunov@gmail.com | fd3af53 | 2008-03-23 00:00:08 +0300 | [diff] [blame] | 126 | movq $X86_CR4_PAE, %rax |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 127 | movq %rax, %cr4 |
| 128 | |
| 129 | jmp 1f |
| 130 | 1: |
| 131 | |
Huang Ying | f5deb79 | 2009-02-03 14:22:48 +0800 | [diff] [blame] | 132 | /* Flush the TLB (needed?) */ |
Huang Ying | fee7b0d | 2009-03-10 10:57:16 +0800 | [diff] [blame] | 133 | movq %r9, %cr3 |
| 134 | |
| 135 | movq %rcx, %r11 |
| 136 | call swap_pages |
| 137 | |
| 138 | /* |
| 139 | * To be certain of avoiding problems with self-modifying code |
| 140 | * I need to execute a serializing instruction here. |
| 141 | * So I flush the TLB by reloading %cr3 here, it's handy, |
| 142 | * and not processor dependent. |
| 143 | */ |
| 144 | movq %cr3, %rax |
| 145 | movq %rax, %cr3 |
| 146 | |
| 147 | /* |
| 148 | * set all of the registers to known values |
| 149 | * leave %rsp alone |
| 150 | */ |
| 151 | |
| 152 | testq %r11, %r11 |
| 153 | jnz 1f |
H. Peter Anvin | f037e41 | 2013-06-20 21:16:00 -0700 | [diff] [blame] | 154 | xorl %eax, %eax |
| 155 | xorl %ebx, %ebx |
| 156 | xorl %ecx, %ecx |
| 157 | xorl %edx, %edx |
| 158 | xorl %esi, %esi |
| 159 | xorl %edi, %edi |
| 160 | xorl %ebp, %ebp |
| 161 | xorl %r8d, %r8d |
| 162 | xorl %r9d, %r9d |
| 163 | xorl %r10d, %r10d |
| 164 | xorl %r11d, %r11d |
| 165 | xorl %r12d, %r12d |
| 166 | xorl %r13d, %r13d |
| 167 | xorl %r14d, %r14d |
| 168 | xorl %r15d, %r15d |
Huang Ying | fee7b0d | 2009-03-10 10:57:16 +0800 | [diff] [blame] | 169 | |
| 170 | ret |
| 171 | |
| 172 | 1: |
| 173 | popq %rdx |
| 174 | leaq PAGE_SIZE(%r10), %rsp |
| 175 | call *%rdx |
| 176 | |
| 177 | /* get the re-entry point of the peer system */ |
| 178 | movq 0(%rsp), %rbp |
| 179 | call 1f |
| 180 | 1: |
| 181 | popq %r8 |
| 182 | subq $(1b - relocate_kernel), %r8 |
| 183 | movq CP_PA_SWAP_PAGE(%r8), %r10 |
| 184 | movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi |
| 185 | movq CP_PA_TABLE_PAGE(%r8), %rax |
| 186 | movq %rax, %cr3 |
| 187 | lea PAGE_SIZE(%r8), %rsp |
| 188 | call swap_pages |
| 189 | movq $virtual_mapped, %rax |
| 190 | pushq %rax |
| 191 | ret |
| 192 | |
| 193 | virtual_mapped: |
| 194 | movq RSP(%r8), %rsp |
| 195 | movq CR4(%r8), %rax |
| 196 | movq %rax, %cr4 |
| 197 | movq CR3(%r8), %rax |
| 198 | movq CR0(%r8), %r8 |
| 199 | movq %rax, %cr3 |
| 200 | movq %r8, %cr0 |
| 201 | movq %rbp, %rax |
| 202 | |
| 203 | popf |
| 204 | popq %r15 |
| 205 | popq %r14 |
| 206 | popq %r13 |
| 207 | popq %r12 |
| 208 | popq %rbp |
| 209 | popq %rbx |
| 210 | ret |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 211 | |
| 212 | /* Do the copies */ |
Huang Ying | fee7b0d | 2009-03-10 10:57:16 +0800 | [diff] [blame] | 213 | swap_pages: |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 214 | movq %rdi, %rcx /* Put the page_list in %rcx */ |
H. Peter Anvin | f037e41 | 2013-06-20 21:16:00 -0700 | [diff] [blame] | 215 | xorl %edi, %edi |
| 216 | xorl %esi, %esi |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 217 | jmp 1f |
| 218 | |
| 219 | 0: /* top, read another word for the indirection page */ |
| 220 | |
| 221 | movq (%rbx), %rcx |
| 222 | addq $8, %rbx |
| 223 | 1: |
| 224 | testq $0x1, %rcx /* is it a destination page? */ |
| 225 | jz 2f |
| 226 | movq %rcx, %rdi |
| 227 | andq $0xfffffffffffff000, %rdi |
| 228 | jmp 0b |
| 229 | 2: |
| 230 | testq $0x2, %rcx /* is it an indirection page? */ |
| 231 | jz 2f |
| 232 | movq %rcx, %rbx |
| 233 | andq $0xfffffffffffff000, %rbx |
| 234 | jmp 0b |
| 235 | 2: |
| 236 | testq $0x4, %rcx /* is it the done indicator? */ |
| 237 | jz 2f |
| 238 | jmp 3f |
| 239 | 2: |
| 240 | testq $0x8, %rcx /* is it the source indicator? */ |
| 241 | jz 0b /* Ignore it otherwise */ |
| 242 | movq %rcx, %rsi /* For ever source page do a copy */ |
| 243 | andq $0xfffffffffffff000, %rsi |
| 244 | |
Huang Ying | fee7b0d | 2009-03-10 10:57:16 +0800 | [diff] [blame] | 245 | movq %rdi, %rdx |
| 246 | movq %rsi, %rax |
| 247 | |
| 248 | movq %r10, %rdi |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 249 | movq $512, %rcx |
| 250 | rep ; movsq |
Huang Ying | fee7b0d | 2009-03-10 10:57:16 +0800 | [diff] [blame] | 251 | |
| 252 | movq %rax, %rdi |
| 253 | movq %rdx, %rsi |
| 254 | movq $512, %rcx |
| 255 | rep ; movsq |
| 256 | |
| 257 | movq %rdx, %rdi |
| 258 | movq %r10, %rsi |
| 259 | movq $512, %rcx |
| 260 | rep ; movsq |
| 261 | |
| 262 | lea PAGE_SIZE(%rax), %rsi |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 263 | jmp 0b |
| 264 | 3: |
Eric W. Biederman | 5234f5e | 2005-06-25 14:58:02 -0700 | [diff] [blame] | 265 | ret |
Huang Ying | fee7b0d | 2009-03-10 10:57:16 +0800 | [diff] [blame] | 266 | |
| 267 | .globl kexec_control_code_size |
| 268 | .set kexec_control_code_size, . - relocate_kernel |