On linux, phys_startup_64 is a symbol, not just any symbol, it’s a entry function:

In x86-64, the entry point of the ‘vmlinux’ elf format file is the symbol ‘phys_startup_64’, which is defined as:

# file: arch/x86/kernel/vmlinux.lds.S
 34 #ifdef CONFIG_X86_32
 35 OUTPUT_ARCH(i386)
 36 ENTRY(phys_startup_32)
 37 jiffies = jiffies_64;
 38 #else
 39 OUTPUT_ARCH(i386:x86-64)
 40 ENTRY(phys_startup_64)
 41 jiffies_64 = jiffies;
 42 #endif
# file: arch/x86/kernel/vmlinux.lds.S
 82 SECTIONS
 83 {
 84 #ifdef CONFIG_X86_32
 85         . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
 86         phys_startup_32 = startup_32 - LOAD_OFFSET;
 87 #else
 88         . = __START_KERNEL;
 89         phys_startup_64 = startup_64 - LOAD_OFFSET;
 90 #endif

https://zhuanlan.zhihu.com/p/94524823

  • startup_64t is the 64-bit entry point for linux

swapper_pg_dir contains a mapping for all physical pages from 0xc0000000 to 0xc0000000 + end_mem, so the first 768 entries in swapper_pg_dir are 0's, and then there are 4 or more that point to kernel page tables.

  • What “swapper_pg_dir” is all about (on linux kernel):
    • The kernel sees memory as if based at 0xC0000000. Any memory allocation, pointer or global, is located between 0xC0000000 to 0xFFFFFFFF. However, for HW controllers, such as the MMU or any co processor, memory window is porbably based at 0x00000000.
    • So, when loading a pointer to a table or descriptor to a HW engine, it must be based at 0x00000000 .
    • When building the Linux kernel, the linker assigns addresses in “kernel space” (virtual addresses in the range used by the kernel) to all kernel symbols. So $swapper_pg_dir  will evaluate to a virtual address, which is only usable AFTER the page tables are set up. $swapper_pg_dir-0xc0000000 is the corresponding physical address
    • kernel is linked to address 0xc0000000. Each process in user-space has it’s own virtual memory map, so multiple processes see different memory page when pointing to the same address (1MB for example). However, when executing kernel code (for example when calling a system call), all processes execute it in the same virtual (but linear) address space - starting at 0xc0000000. 0xc0000000 is historical and relies on variables such as 32bit address space, memory holes in PC systems and sizes of system memory, acceptable back then.
static status_t linux_filemode_init(vmi_instance_t vmi)
{
    status_t rc;
    addr_t swapper_pg_dir = 0, kernel_pgt = 0;
    addr_t boundary = 0, phys_start = 0, virt_start = 0;
 
    switch (vmi->page_mode) {
        case VMI_PM_AARCH64:
        case VMI_PM_IA32E:
						/*
							phys_startup_64 = linux entrypoint physical address 
							startup_64 = linux entrypoint address
						*/
            linux_symbol_to_address(vmi, "phys_startup_64", NULL, &phys_start);
            linux_symbol_to_address(vmi, "startup_64", NULL, &virt_start);
            break;
        case VMI_PM_AARCH32:
        case VMI_PM_LEGACY:
        case VMI_PM_PAE:
            linux_symbol_to_address(vmi, "phys_startup_32", NULL, &phys_start);
            linux_symbol_to_address(vmi, "startup_32", NULL, &virt_start);
            break;
        case VMI_PM_UNKNOWN:
            linux_symbol_to_address(vmi, "phys_startup_64", NULL, &phys_start);
            linux_symbol_to_address(vmi, "startup_64", NULL, &virt_start);
 
            if (phys_start && virt_start) break;
            phys_start = virt_start = 0;
 
            linux_symbol_to_address(vmi, "phys_startup_32", NULL, &phys_start);
            linux_symbol_to_address(vmi, "startup_32", NULL, &virt_start);
            break;
    }
 
    virt_start = canonical_addr(virt_start);
		
   

Here it checks if the “physical memory” is smaller than “virtual memory”. This condition must be true (because it’s how virtual memory works btw).

https://linux-kernel-labs.github.io/refs/heads/master/labs/memory_mapping.html

 if (phys_start && virt_start && phys_start < virt_start) {
        boundary = virt_start - phys_start;
        dbprint(VMI_DEBUG_MISC, "--got kernel boundary (0x%.16"PRIx64").\n", boundary);
    }

The swapper_pg_dir and the boundary will be used in a moment to calculate vmi->kpgd = swapper_pg_dir - boundary; on linux_filemode_32bit_init() call

Note that we just try the linux_filemode_32bit_init() if the symbol swapper_pg_dir is available

 
		/*
		swapper_pg_dir contains a mapping for all physical pages from 0xc0000000 to 0xc0000000 + end_mem, 
		so the first 768 entries in swapper_pg_dir are 0's, and then there are 4 or more that point to kernel page tables.
		*/
    rc = linux_symbol_to_address(vmi, "swapper_pg_dir", NULL, &swapper_pg_dir);
 
    if (VMI_SUCCESS == rc) {
 
        dbprint(VMI_DEBUG_MISC, "--got vaddr for swapper_pg_dir (0x%.16"PRIx64").\n",
                swapper_pg_dir);
			
        swapper_pg_dir = canonical_addr(swapper_pg_dir);
 
        /* We don't know if VMI_PM_LEGACY, VMI_PM_PAE or VMI_PM_AARCH32 yet
         * so we do some heuristics below. */
        if (boundary) {
            rc = linux_filemode_32bit_init(vmi, swapper_pg_dir, boundary,
                                           phys_start, virt_start);
            if (VMI_SUCCESS == rc)
                goto done;
        }
 
        /*
         * So we have a swapper but don't know the physical page of it.
         * We will make some educated guesses now.
         */
        boundary = 0xC0000000;
        dbprint(VMI_DEBUG_MISC, "--trying boundary 0x%.16"PRIx64".\n",
                boundary);
        rc = linux_filemode_32bit_init(vmi, swapper_pg_dir, boundary,
                                       swapper_pg_dir-boundary, swapper_pg_dir);
        if (VMI_SUCCESS == rc) {
            goto done;
        }
 
        boundary = 0x80000000;
        dbprint(VMI_DEBUG_MISC, "--trying boundary 0x%.16"PRIx64".\n",
                boundary);
        rc = linux_filemode_32bit_init(vmi, swapper_pg_dir, boundary,
                                       swapper_pg_dir-boundary, swapper_pg_dir);
        if (VMI_SUCCESS == rc) {
            goto done;
        }
 
        boundary = 0x40000000;
        dbprint(VMI_DEBUG_MISC, "--trying boundary 0x%.16"PRIx64".\n",
                boundary);
        rc = linux_filemode_32bit_init(vmi, swapper_pg_dir, boundary,
                                       swapper_pg_dir-boundary, swapper_pg_dir);
        goto done;
    }
 

linux_filemode_32bit_init()

Let’s see if we are lucky in 64-bit mode:

On 64bit linux kernel source, the symbol init_level4_pgt is actually a #define to **swapper_pg_dir**

 
(...)
extern pgd_t init_level4_pgt[];
 
#define swapper_pg_dir init_level4_pgt
(...)

https://elixir.bootlin.com/linux/v2.6.33/source/arch/x86/include/asm/pgtable_64.h#L22

/* Try 64-bit init */
    rc = linux_symbol_to_address(vmi, "init_level4_pgt", NULL, &kernel_pgt);
    if ( rc == VMI_FAILURE )
        rc = linux_symbol_to_address(vmi, "init_top_pgt", NULL, &kernel_pgt);
 
    if (rc == VMI_FAILURE)
        goto done;
 
    dbprint(VMI_DEBUG_MISC, "--got vaddr for kernel pagetable (0x%.16"PRIx64").\n", kernel_pgt);
 
    kernel_pgt = canonical_addr(kernel_pgt);
 
    if (!boundary)
        return VMI_FAILURE;
 
    vmi->page_mode = VMI_PM_IA32E;
 
    rc = arch_init(vmi);
    if (VMI_FAILURE == rc)
        return VMI_FAILURE;
 
    addr_t test = 0;
 
    /* First, look for kernel in likely place */
    rc = vmi_pagetable_lookup(vmi, kernel_pgt - boundary, virt_start, &test);
    if ( VMI_SUCCESS == rc &&
            test == phys_start) {
        vmi->kpgd = kernel_pgt - boundary;
        rc = verify_linux_paging (vmi);
    }
 
    /* If that didn't work, brute force across possible KPDB locations and virtual kernel bases */
    if (VMI_FAILURE == rc) {
        rc = brute_force_find_kern_mem(vmi);
    }
 
done:
    return rc;
}

brute_force_find_kern_mem()


🌱 Back to Garden

2 items under this folder.