ld(1)

ld [opts] files...
    -T <script>        use <script> as linker script
    --trace            report each file the linker touches
    --start-group archives --end-group
                       search archives repearepeatedly until no new
                       undefined  references are created
                       (eg helpfull with list of static libraries)

Linker Script

output sections are defined as follows (full description at output section and input section).

section_name [vaddr] : [AT(paddr)] {
    file_pattern (section_pattern)
}

The following gives an example of an output section with two input section rules.

.foo : {
    abc.o (.foo)
    *.o (.foo.*)
}

Example: virtual vs physical (load) address

Sometimes code is initially located at a different location as when being run. For example in embedded cases, where code may initially resides in a rom and startup code will copy a section with writable data into ram. Code accessing the writable data accesses the data in the ram.

In this case we need different addresses for the same data.

  • The virtual or runtime address, this is the address used when the linker resolves accesses to the data. Hence, this is the address the data will have when the code is running.
  • The physical or load address, this is the address the data is stored at initially. Startup code typically copies the initial values from the physical to the virtual address.

The following shows an example linker script which uses virtual and physical addresses. The full source files can be found here.

OUTPUT_FORMAT(elf64-x86-64)
ENTRY(_entry)

SECTIONS {
    /* Set the initial location counter (vaddr) */
    . = 0x00800000;

    /* Create .text output section at current vaddr */
    .text : {
        *(.text*)
    }

    ASSERT(. == 0x00800000 + SIZEOF(.text), "inc loc counter automatically")

    /* Create .data section at location counter aligned to the next 0x100 (vaddr) */
    /* Set the load address to  0x00100000 (paddr) */
    .data ALIGN(0x100) : AT(0x00100000) {
        HIDDEN(_data_vaddr = .);
        HIDDEN(_data_paddr = LOADADDR(.data));
        *(.data*)
    }

    /* Create .rodata with explicit vaddr */
    /* Re-adjust the paddr location counter */
    .rodata 0x00804000 : AT(ADDR(.rodata)) {
        *(.rodata*)
    }

    ASSERT(. == 0x00804000 + SIZEOF(.rodata), "inc loc counter automatically")

    .stack ALIGN (0x1000) : {
        . += 0x1000;
        HIDDEN(_stack_top = .);
    }

    /DISCARD/ : {
        *(.*)
    }
}

/* Some example assertions */
ASSERT(ADDR(.data) != LOADADDR(.data), "DATA vaddr and paddr must be different")
ASSERT(SIZEOF(.stack) == 0x1000, "STACK section must be 0x1000")

We can use the following assembly snippet to explore the linker script.

.section .text, "ax", @progbits
.global _entry
_entry:
    mov $_stack_top, %rsp
    mov $asm_array, %rax
    mov (asm_len), %eax

    hlt
    jmp _entry

.section .data.asm, "aw", @progbits
asm_array:
    .4byte 0xa
    .4byte 0xb
    .4byte 0xc
    .4byte 0xd
.rept 4
    .4byte 0xff
.endr

.section .rodata.asm, "a", @progbits
asm_len:
    .4byte 8

gcc -c data.S && ld -o link-nomem -T link-nomem.ld data.o

The elf load segments show the difference in physical and virtual address for the segment containing the .data section.

> readelf -W -l link-nomem
# There are 4 program headers, starting at offset 64
#
# Program Headers:
#   Type   Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
#   LOAD   0x001100 0x0000000000800100 0x0000000000100000 0x000020 0x000020 RW  0x1000
#   LOAD   0x002000 0x0000000000800000 0x0000000000800000 0x000018 0x000018 R E 0x1000
#   LOAD   0x003000 0x0000000000804000 0x0000000000804000 0x000004 0x000004 R   0x1000
#   LOAD   0x000000 0x0000000000805000 0x0000000000805000 0x000000 0x001000 RW  0x1000
#
#  Section to Segment mapping:
#   Segment Sections...
#   00     .data
#   01     .text
#   02     .rodata
#   03     .stack

Startup code could copy data from _data_paddr to _data_vaddr.

> nm link-nomem
# 0000000000800100 d asm_array
# 0000000000804000 r asm_len
# 0000000000100000 a _data_paddr
# 0000000000800100 d _data_vaddr
# 0000000000800000 T _entry
# 0000000000806000 b _stack_top

The linker resolves symbols to their virtual address, this can be seen by the access to the asm_array variable.

> objdump -d link-nomem
# Disassembly of section .text:
#
# 0000000000800000 <_entry>:
#   800000:	48 c7 c4 00 60 80 00 	mov    $0x806000,%rsp
#   800007:	48 c7 c0 00 01 80 00 	mov    $0x800100,%rax   ;; mov $asm_array, %rax
#   80000e:	8b 04 25 00 40 80 00 	mov    0x804000,%eax
#   800015:	f4                   	hlt
#   800016:	eb e8                	jmp    800000 <_entry>

The following linker script shows an example with the MEMORY command.

OUTPUT_FORMAT(elf64-x86-64)
ENTRY(_entry)

MEMORY {
    ROM : ORIGIN = 0x00100000, LENGTH = 0x4000
    RAM : ORIGIN = 0x00800000, LENGTH = 0x4000
}

SECTIONS {
    /* Create .text output section at ROM (vaddr) */
    .text : {
        *(.text*)
    } > ROM

    ASSERT(. == ORIGIN(ROM) + SIZEOF(.text), "inc loc counter automatically")

    /* Create .data output section at RAM (vaddr) */
    /* Set load addr to ROM, right after .text (paddr) */
    .data : {
        HIDDEN(_data_vaddr = .);
        HIDDEN(_data_paddr = LOADADDR(.data));
        *(.data*)
    } > RAM AT > ROM

    /* Append .rodata output section at ROM (vaddr) */
    .rodata : {
        *(.rodata*)
    } > ROM

    /* Append .stack output section at RAM (vaddr) aligned up to next 0x1000 */
    .stack : ALIGN (0x1000) {
        . += 0x1000;
        HIDDEN(_stack_top = .);
    } > RAM

    /DISCARD/ : {
        *(.*)
    }
}

/* Some example assertions */
ASSERT(ADDR(.data) != LOADADDR(.data), "DATA vaddr and paddr must be different")
ASSERT(ADDR(.rodata) == LOADADDR(.rodata), "RODATA vaddr and paddr must be euqal")
ASSERT(ADDR(.stack) == ORIGIN(RAM) + 0x1000, "STACK section must aligned to 0x1000")
ASSERT(SIZEOF(.stack) == 0x1000, "STACK section must be 0x1000")

References