|
294 | 294 | * copied there. So allocate the stack-frame on the task-stack and
|
295 | 295 | * switch to it before we do any copying.
|
296 | 296 | */
|
| 297 | + |
| 298 | +#define CS_FROM_ENTRY_STACK (1 << 31) |
| 299 | + |
297 | 300 | .macro SWITCH_TO_KERNEL_STACK
|
298 | 301 |
|
299 | 302 | ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
|
|
316 | 319 | /* Load top of task-stack into %edi */
|
317 | 320 | movl TSS_entry2task_stack(%edi), %edi
|
318 | 321 |
|
| 322 | + /* |
| 323 | + * Clear unused upper bits of the dword containing the word-sized CS |
| 324 | + * slot in pt_regs in case hardware didn't clear it for us. |
| 325 | + */ |
| 326 | + andl $(0x0000ffff), PT_CS(%esp) |
| 327 | + |
| 328 | + /* Special case - entry from kernel mode via entry stack */ |
| 329 | + testl $SEGMENT_RPL_MASK, PT_CS(%esp) |
| 330 | + jz .Lentry_from_kernel_\@ |
| 331 | + |
319 | 332 | /* Bytes to copy */
|
320 | 333 | movl $PTREGS_SIZE, %ecx
|
321 | 334 |
|
|
329 | 342 | */
|
330 | 343 | addl $(4 * 4), %ecx
|
331 | 344 |
|
332 |
| -.Lcopy_pt_regs_\@: |
333 | 345 | #endif
|
| 346 | +.Lcopy_pt_regs_\@: |
334 | 347 |
|
335 | 348 | /* Allocate frame on task-stack */
|
336 | 349 | subl %ecx, %edi
|
|
346 | 359 | cld
|
347 | 360 | rep movsl
|
348 | 361 |
|
| 362 | + jmp .Lend_\@ |
| 363 | + |
| 364 | +.Lentry_from_kernel_\@: |
| 365 | + |
| 366 | + /* |
| 367 | + * This handles the case when we enter the kernel from |
| 368 | + * kernel-mode and %esp points to the entry-stack. When this |
| 369 | + * happens we need to switch to the task-stack to run C code, |
| 370 | + * but switch back to the entry-stack again when we approach |
| 371 | + * iret and return to the interrupted code-path. This usually |
| 372 | + * happens when we hit an exception while restoring user-space |
| 373 | + * segment registers on the way back to user-space. |
| 374 | + * |
| 375 | + * When we switch to the task-stack here, we can't trust the |
| 376 | + * contents of the entry-stack anymore, as the exception handler |
| 377 | + * might be scheduled out or moved to another CPU. Therefore we |
| 378 | + * copy the complete entry-stack to the task-stack and set a |
| 379 | + * marker in the iret-frame (bit 31 of the CS dword) to detect |
| 380 | + * what we've done on the iret path. |
| 381 | + * |
| 382 | + * On the iret path we copy everything back and switch to the |
| 383 | + * entry-stack, so that the interrupted kernel code-path |
| 384 | + * continues on the same stack it was interrupted with. |
| 385 | + * |
| 386 | + * Be aware that an NMI can happen anytime in this code. |
| 387 | + * |
| 388 | + * %esi: Entry-Stack pointer (same as %esp) |
| 389 | + * %edi: Top of the task stack |
| 390 | + */ |
| 391 | + |
| 392 | + /* Calculate number of bytes on the entry stack in %ecx */ |
| 393 | + movl %esi, %ecx |
| 394 | + |
| 395 | + /* %ecx to the top of entry-stack */ |
| 396 | + andl $(MASK_entry_stack), %ecx |
| 397 | + addl $(SIZEOF_entry_stack), %ecx |
| 398 | + |
| 399 | + /* Number of bytes on the entry stack to %ecx */ |
| 400 | + sub %esi, %ecx |
| 401 | + |
| 402 | + /* Mark stackframe as coming from entry stack */ |
| 403 | + orl $CS_FROM_ENTRY_STACK, PT_CS(%esp) |
| 404 | + |
| 405 | + /* |
| 406 | + * %esi and %edi are unchanged, %ecx contains the number of |
| 407 | + * bytes to copy. The code at .Lcopy_pt_regs_\@ will allocate |
| 408 | + * the stack-frame on task-stack and copy everything over |
| 409 | + */ |
| 410 | + jmp .Lcopy_pt_regs_\@ |
| 411 | + |
349 | 412 | .Lend_\@:
|
350 | 413 | .endm
|
351 | 414 |
|
|
403 | 466 | .Lend_\@:
|
404 | 467 | .endm
|
405 | 468 |
|
| 469 | +/* |
| 470 | + * This macro handles the case when we return to kernel-mode on the iret |
| 471 | + * path and have to switch back to the entry stack. |
| 472 | + * |
| 473 | + * See the comments below the .Lentry_from_kernel_\@ label in the |
| 474 | + * SWITCH_TO_KERNEL_STACK macro for more details. |
| 475 | + */ |
| 476 | +.macro PARANOID_EXIT_TO_KERNEL_MODE |
| 477 | + |
| 478 | + /* |
| 479 | + * Test if we entered the kernel with the entry-stack. Most |
| 480 | + * likely we did not, because this code only runs on the |
| 481 | + * return-to-kernel path. |
| 482 | + */ |
| 483 | + testl $CS_FROM_ENTRY_STACK, PT_CS(%esp) |
| 484 | + jz .Lend_\@ |
| 485 | + |
| 486 | + /* Unlikely slow-path */ |
| 487 | + |
| 488 | + /* Clear marker from stack-frame */ |
| 489 | + andl $(~CS_FROM_ENTRY_STACK), PT_CS(%esp) |
| 490 | + |
| 491 | + /* Copy the remaining task-stack contents to entry-stack */ |
| 492 | + movl %esp, %esi |
| 493 | + movl PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi |
| 494 | + |
| 495 | + /* Bytes on the task-stack to ecx */ |
| 496 | + movl PER_CPU_VAR(cpu_tss_rw + TSS_sp1), %ecx |
| 497 | + subl %esi, %ecx |
| 498 | + |
| 499 | + /* Allocate stack-frame on entry-stack */ |
| 500 | + subl %ecx, %edi |
| 501 | + |
| 502 | + /* |
| 503 | + * Save future stack-pointer, we must not switch until the |
| 504 | + * copy is done, otherwise the NMI handler could destroy the |
| 505 | + * contents of the task-stack we are about to copy. |
| 506 | + */ |
| 507 | + movl %edi, %ebx |
| 508 | + |
| 509 | + /* Do the copy */ |
| 510 | + shrl $2, %ecx |
| 511 | + cld |
| 512 | + rep movsl |
| 513 | + |
| 514 | + /* Safe to switch to entry-stack now */ |
| 515 | + movl %ebx, %esp |
| 516 | + |
| 517 | +.Lend_\@: |
| 518 | +.endm |
406 | 519 | /*
|
407 | 520 | * %eax: prev task
|
408 | 521 | * %edx: next task
|
@@ -764,6 +877,7 @@ restore_all:
|
764 | 877 |
|
765 | 878 | restore_all_kernel:
|
766 | 879 | TRACE_IRQS_IRET
|
| 880 | + PARANOID_EXIT_TO_KERNEL_MODE |
767 | 881 | RESTORE_REGS 4
|
768 | 882 | jmp .Lirq_return
|
769 | 883 |
|
|
0 commit comments