* [PATCH 1/6] split linker out of elf module
2013-05-18 15:05 wip-linker: Refactor ELF linker and loader Andy Wingo
@ 2013-05-18 15:05 ` Andy Wingo
2013-05-22 20:39 ` Ludovic Courtès
2013-05-18 15:05 ` [PATCH 2/6] ELF refactor and consequent linker simplifications Andy Wingo
` (5 subsequent siblings)
6 siblings, 1 reply; 24+ messages in thread
From: Andy Wingo @ 2013-05-18 15:05 UTC (permalink / raw)
To: guile-devel; +Cc: Andy Wingo
* module/Makefile.am:
* module/system/vm/linker.scm: New file, split out of (system vm elf).
* module/system/vm/elf.scm: Remove linking capabilities.
* module/language/objcode/elf.scm: Adapt caller to use (system vm
linker).
* test-suite/tests/linker.test: New test.
---
module/Makefile.am | 1 +
module/language/objcode/elf.scm | 29 +--
module/system/vm/elf.scm | 387 ++--------------------------------
module/system/vm/linker.scm | 442 +++++++++++++++++++++++++++++++++++++++
test-suite/tests/linker.test | 86 ++++++++
5 files changed, 562 insertions(+), 383 deletions(-)
create mode 100644 module/system/vm/linker.scm
create mode 100644 test-suite/tests/linker.test
diff --git a/module/Makefile.am b/module/Makefile.am
index 4daf7cf..0601a05 100644
--- a/module/Makefile.am
+++ b/module/Makefile.am
@@ -348,6 +348,7 @@ SYSTEM_SOURCES = \
system/vm/inspect.scm \
system/vm/coverage.scm \
system/vm/elf.scm \
+ system/vm/linker.scm \
system/vm/frame.scm \
system/vm/instruction.scm \
system/vm/objcode.scm \
diff --git a/module/language/objcode/elf.scm b/module/language/objcode/elf.scm
index 9654c08..1edfdcf 100644
--- a/module/language/objcode/elf.scm
+++ b/module/language/objcode/elf.scm
@@ -1,6 +1,6 @@
;;; Embedding bytecode in ELF
-;; Copyright (C) 2012 Free Software Foundation, Inc.
+;; Copyright (C) 2012, 2013 Free Software Foundation, Inc.
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
@@ -30,24 +30,25 @@
#:use-module (rnrs bytevectors)
#:use-module (ice-9 binary-ports)
#:use-module (system vm elf)
+ #:use-module (system vm linker)
#:export (write-objcode))
(define (bytecode->elf bv)
- (let ((string-table (make-elf-string-table)))
+ (let ((string-table (make-string-table)))
(define (intern-string! string)
(call-with-values
- (lambda () (elf-string-table-intern string-table string))
+ (lambda () (string-table-intern string-table string))
(lambda (table idx)
(set! string-table table)
idx)))
(define (make-object name bv relocs . kwargs)
(let ((name-idx (intern-string! (symbol->string name))))
- (make-elf-object (apply make-elf-section
- #:name name-idx
- #:size (bytevector-length bv)
- kwargs)
- bv relocs
- (list (make-elf-symbol name 0)))))
+ (make-linker-object (apply make-elf-section
+ #:name name-idx
+ #:size (bytevector-length bv)
+ kwargs)
+ bv relocs
+ (list (make-linker-symbol name 0)))))
(define (make-dynamic-section word-size endianness)
(define (make-dynamic-section/32)
(let ((bv (make-bytevector 24 0)))
@@ -57,7 +58,7 @@
(bytevector-u32-set! bv 12 0 endianness)
(bytevector-u32-set! bv 16 DT_NULL endianness)
(bytevector-u32-set! bv 20 0 endianness)
- (values bv (make-elf-reloc 'abs32/1 12 0 '.rtl-text))))
+ (values bv (make-linker-reloc 'abs32/1 12 0 '.rtl-text))))
(define (make-dynamic-section/64)
(let ((bv (make-bytevector 48 0)))
(bytevector-u64-set! bv 0 DT_GUILE_RTL_VERSION endianness)
@@ -66,7 +67,7 @@
(bytevector-u64-set! bv 24 0 endianness)
(bytevector-u64-set! bv 32 DT_NULL endianness)
(bytevector-u64-set! bv 40 0 endianness)
- (values bv (make-elf-reloc 'abs64/1 24 0 '.rtl-text))))
+ (values bv (make-linker-reloc 'abs64/1 24 0 '.rtl-text))))
(call-with-values (lambda ()
(case word-size
((4) (make-dynamic-section/32))
@@ -75,9 +76,9 @@
(lambda (bv reloc)
(make-object '.dynamic bv (list reloc)
#:type SHT_DYNAMIC #:flags SHF_ALLOC))))
- (define (link-string-table)
+ (define (make-string-table)
(intern-string! ".shstrtab")
- (make-object '.shstrtab (link-elf-string-table string-table) '()
+ (make-object '.shstrtab (link-string-table string-table) '()
#:type SHT_STRTAB #:flags 0))
(let* ((word-size (target-word-size))
(endianness (target-endianness))
@@ -85,7 +86,7 @@
(dt (make-dynamic-section word-size endianness))
;; This needs to be linked last, because linking other
;; sections adds entries to the string table.
- (shstrtab (link-string-table)))
+ (shstrtab (make-string-table)))
(link-elf (list text dt shstrtab)
#:endianness endianness #:word-size word-size))))
diff --git a/module/system/vm/elf.scm b/module/system/vm/elf.scm
index 040b274..e2b2454 100644
--- a/module/system/vm/elf.scm
+++ b/module/system/vm/elf.scm
@@ -1,6 +1,6 @@
;;; Guile ELF reader and writer
-;; Copyright (C) 2011, 2012 Free Software Foundation, Inc.
+;; Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc.
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
@@ -33,12 +33,22 @@
elf-entry elf-phoff elf-shoff elf-flags elf-ehsize
elf-phentsize elf-phnum elf-shentsize elf-shnum elf-shstrndx
+ elf-header-len write-elf-header
+
(make-elf-segment* . make-elf-segment)
elf-segment?
elf-segment-type elf-segment-offset elf-segment-vaddr
elf-segment-paddr elf-segment-filesz elf-segment-memsz
elf-segment-flags elf-segment-align
+ elf-program-header-len write-elf-program-header
+
+ PT_NULL PT_LOAD PT_DYNAMIC PT_INTERP PT_NOTE PT_SHLIB
+ PT_PHDR PT_TLS PT_NUM PT_LOOS PT_GNU_EH_FRAME PT_GNU_STACK
+ PT_GNU_RELRO
+
+ PF_R PF_W PF_X
+
(make-elf-section* . make-elf-section)
elf-section?
elf-section-name elf-section-type elf-section-flags
@@ -46,11 +56,15 @@
elf-section-link elf-section-info elf-section-addralign
elf-section-entsize
+ elf-section-header-len write-elf-section-header
+
make-elf-symbol elf-symbol?
elf-symbol-name elf-symbol-value elf-symbol-size
elf-symbol-info elf-symbol-other elf-symbol-shndx
elf-symbol-binding elf-symbol-type elf-symbol-visibility
+ SHN_UNDEF
+
SHT_NULL SHT_PROGBITS SHT_SYMTAB SHT_STRTAB SHT_RELA
SHT_HASH SHT_DYNAMIC SHT_NOTE SHT_NOBITS SHT_REL SHT_SHLIB
SHT_DYNSYM SHT_INIT_ARRAY SHT_FINI_ARRAY SHT_PREINIT_ARRAY
@@ -72,6 +86,8 @@
DT_GUILE_RTL_VERSION DT_HIGUILE DT_LOOS DT_HIOS DT_LOPROC
DT_HIPROC
+ string-table-ref
+
STB_LOCAL STB_GLOBAL STB_WEAK STB_NUM STB_LOOS STB_GNU
STB_HIOS STB_LOPROC STB_HIPROC
@@ -89,23 +105,7 @@
elf-symbol-table-ref
parse-elf-note
- elf-note-name elf-note-desc elf-note-type
-
- (make-string-table . make-elf-string-table)
- (string-table-intern . elf-string-table-intern)
- (link-string-table . link-elf-string-table)
-
- (make-reloc . make-elf-reloc)
- (make-symbol . make-elf-symbol)
-
- (make-object . make-elf-object)
- (object? . elf-object?)
- (object-section . elf-object-section)
- (object-bv . elf-object-bv)
- (object-relocs . elf-object-relocs)
- (object-symbols . elf-object-symbols)
-
- link-elf))
+ elf-note-name elf-note-desc elf-note-type))
;; #define EI_NIDENT 16
@@ -902,354 +902,3 @@
(bytevector-copy! bv (+ offset 12) name 0 (1- namesz))
(bytevector-copy! bv (+ offset 12 namesz) desc 0 descsz)
(make-elf-note (utf8->string name) desc type)))))
-
-
-\f
-
-;;;
-;;; All of that was the parser. Now, on to a linker.
-;;;
-
-;; A relocation records a reference to a symbol. When the symbol is
-;; resolved to an address, the reloc location will be updated to point
-;; to the address.
-;;
-;; Two types. Abs32/1 and Abs64/1 are absolute offsets in bytes.
-;; Rel32/4 is a relative signed offset in 32-bit units. Either can have
-;; an arbitrary addend as well.
-;;
-(define-record-type <reloc>
- (make-reloc type loc addend symbol)
- reloc?
- (type reloc-type) ;; rel32/4, abs32/1, abs64/1
- (loc reloc-loc)
- (addend reloc-addend)
- (symbol reloc-symbol))
-
-;; A symbol is an association between a name and an address. The
-;; address is always in regard to some particular address space. When
-;; objects come into the linker, their symbols live in the object
-;; address space. When the objects are allocated into ELF segments, the
-;; symbols will be relocated into memory address space, corresponding to
-;; the position the ELF will be loaded at.
-;;
-(define-record-type <symbol>
- (make-symbol name address)
- symbol?
- (name symbol-name)
- (address symbol-address))
-
-(define-record-type <object>
- (make-object section bv relocs symbols)
- object?
- (section object-section)
- (bv object-bv)
- (relocs object-relocs)
- (symbols object-symbols))
-
-(define (make-string-table)
- '(("" 0 #vu8())))
-
-(define (string-table-length table)
- (let ((last (car table)))
- ;; The + 1 is for the trailing NUL byte.
- (+ (cadr last) (bytevector-length (caddr last)) 1)))
-
-(define (string-table-intern table str)
- (cond
- ((assoc str table)
- => (lambda (ent)
- (values table (cadr ent))))
- (else
- (let* ((next (string-table-length table)))
- (values (cons (list str next (string->utf8 str))
- table)
- next)))))
-
-(define (link-string-table table)
- (let ((out (make-bytevector (string-table-length table) 0)))
- (for-each
- (lambda (ent)
- (let ((bytes (caddr ent)))
- (bytevector-copy! bytes 0 out (cadr ent) (bytevector-length bytes))))
- table)
- out))
-
-(define (segment-kind section)
- (let ((flags (elf-section-flags section)))
- (cons (cond
- ((= (elf-section-type section) SHT_DYNAMIC) PT_DYNAMIC)
- ((zero? (logand SHF_ALLOC flags)) PT_NOTE)
- (else PT_LOAD))
- (logior (if (zero? (logand SHF_ALLOC flags))
- 0
- PF_R)
- (if (zero? (logand SHF_EXECINSTR flags))
- 0
- PF_X)
- (if (zero? (logand SHF_WRITE flags))
- 0
- PF_W)))))
-
-(define (group-by-cars ls)
- (let lp ((in ls) (k #f) (group #f) (out '()))
- (cond
- ((null? in)
- (reverse!
- (if group
- (cons (cons k (reverse! group)) out)
- out)))
- ((and group (equal? k (caar in)))
- (lp (cdr in) k (cons (cdar in) group) out))
- (else
- (lp (cdr in) (caar in) (list (cdar in))
- (if group
- (cons (cons k (reverse! group)) out)
- out))))))
-
-(define (collate-objects-into-segments objects)
- (group-by-cars
- (stable-sort!
- (map (lambda (o)
- (cons (segment-kind (object-section o)) o))
- objects)
- (lambda (x y)
- (let ((x-type (caar x)) (y-type (caar y))
- (x-flags (cdar x)) (y-flags (cdar y))
- (x-section (object-section (cdr x)))
- (y-section (object-section (cdr y))))
- (cond
- ((not (equal? x-flags y-flags))
- (< x-flags y-flags))
- ((not (equal? x-type y-type))
- (< x-type y-type))
- ((not (equal? (elf-section-type x-section)
- (elf-section-type y-section)))
- (cond
- ((equal? (elf-section-type x-section) SHT_NOBITS) #t)
- ((equal? (elf-section-type y-section) SHT_NOBITS) #f)
- (else (< (elf-section-type x-section)
- (elf-section-type y-section)))))
- (else
- (< (elf-section-size x-section)
- (elf-section-size y-section)))))))))
-
-(define (align address alignment)
- (+ address
- (modulo (- alignment (modulo address alignment)) alignment)))
-
-(define (fold1 proc ls s0)
- (let lp ((ls ls) (s0 s0))
- (if (null? ls)
- s0
- (lp (cdr ls) (proc (car ls) s0)))))
-
-(define (fold2 proc ls s0 s1)
- (let lp ((ls ls) (s0 s0) (s1 s1))
- (if (null? ls)
- (values s0 s1)
- (receive (s0 s1) (proc (car ls) s0 s1)
- (lp (cdr ls) s0 s1)))))
-
-(define (fold4 proc ls s0 s1 s2 s3)
- (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3))
- (if (null? ls)
- (values s0 s1 s2 s3)
- (receive (s0 s1 s2 s3) (proc (car ls) s0 s1 s2 s3)
- (lp (cdr ls) s0 s1 s2 s3)))))
-
-(define (fold5 proc ls s0 s1 s2 s3 s4)
- (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3) (s4 s4))
- (if (null? ls)
- (values s0 s1 s2 s3 s4)
- (receive (s0 s1 s2 s3 s4) (proc (car ls) s0 s1 s2 s3 s4)
- (lp (cdr ls) s0 s1 s2 s3 s4)))))
-
-(define (relocate-section-header sec fileaddr memaddr)
- (make-elf-section (elf-section-name sec) (elf-section-type sec)
- (elf-section-flags sec) memaddr
- fileaddr (elf-section-size sec)
- (elf-section-link sec) (elf-section-info sec)
- (elf-section-addralign sec) (elf-section-entsize sec)))
-
-(define *page-size* 4096)
-
-;; Adds object symbols to global table, relocating them from object
-;; address space to memory address space.
-(define (add-symbols symbols offset symtab)
- (fold1 (lambda (symbol symtab)
- (let ((name (symbol-name symbol))
- (addr (symbol-address symbol)))
- (vhash-consq name (make-symbol name (+ addr offset)) symtab)))
- symbols
- symtab))
-
-(define (alloc-segment type flags objects fileaddr memaddr symtab alignment)
- (let* ((loadable? (not (zero? flags)))
- (alignment (fold1 (lambda (o alignment)
- (lcm (elf-section-addralign (object-section o))
- alignment))
- objects
- alignment))
- (fileaddr (align fileaddr alignment))
- (memaddr (align memaddr alignment)))
- (receive (objects fileend memend symtab)
- (fold4 (lambda (o out fileaddr memaddr symtab)
- (let* ((section (object-section o))
- (fileaddr
- (if (= (elf-section-type section) SHT_NOBITS)
- fileaddr
- (align fileaddr (elf-section-addralign section))))
- (memaddr
- (align memaddr (elf-section-addralign section))))
- (values
- (cons (make-object (relocate-section-header section fileaddr
- memaddr)
- (object-bv o)
- (object-relocs o)
- (object-symbols o))
- out)
- (if (= (elf-section-type section) SHT_NOBITS)
- fileaddr
- (+ fileaddr (elf-section-size section)))
- (+ memaddr (elf-section-size section))
- (add-symbols (object-symbols o) memaddr symtab))))
- objects '() fileaddr memaddr symtab)
- (values
- (make-elf-segment* #:type type #:offset fileaddr
- #:vaddr (if loadable? memaddr 0)
- #:filesz (- fileend fileaddr)
- #:memsz (if loadable? (- memend memaddr) 0)
- #:flags flags #:align alignment)
- (reverse objects)
- symtab))))
-
-(define (process-reloc reloc bv file-offset mem-offset symtab endianness)
- (let ((ent (vhash-assq (reloc-symbol reloc) symtab)))
- (unless ent
- (error "Undefined symbol" (reloc-symbol reloc)))
- (let* ((file-loc (+ (reloc-loc reloc) file-offset))
- (mem-loc (+ (reloc-loc reloc) mem-offset))
- (addr (symbol-address (cdr ent))))
- (case (reloc-type reloc)
- ((rel32/4)
- (let ((diff (- addr mem-loc)))
- (unless (zero? (modulo diff 4))
- (error "Bad offset" reloc symbol mem-offset))
- (bytevector-s32-set! bv file-loc
- (+ (/ diff 4) (reloc-addend reloc))
- endianness)))
- ((abs32/1)
- (bytevector-u32-set! bv file-loc addr endianness))
- ((abs64/1)
- (bytevector-u64-set! bv file-loc addr endianness))
- (else
- (error "bad reloc type" reloc))))))
-
-(define (write-object bv o symtab endianness)
- (let* ((section (object-section o))
- (offset (elf-section-offset section))
- (addr (elf-section-addr section))
- (len (elf-section-size section))
- (bytes (object-bv o))
- (relocs (object-relocs o)))
- (if (not (= (elf-section-type section) SHT_NOBITS))
- (begin
- (if (not (= (elf-section-size section) (bytevector-length bytes)))
- (error "unexpected length" section bytes))
- (bytevector-copy! bytes 0 bv offset len)
- (for-each (lambda (reloc)
- (process-reloc reloc bv offset addr symtab endianness))
- relocs)))))
-
-(define (compute-sections-by-name seglists)
- (let lp ((in (apply append (map cdr seglists)))
- (n 1) (out '()) (shstrtab #f))
- (if (null? in)
- (fold1 (lambda (x tail)
- (cond
- ((false-if-exception
- (string-table-ref shstrtab (car x)))
- => (lambda (str) (acons str (cdr x) tail)))
- (else tail)))
- out '())
- (let* ((section (object-section (car in)))
- (bv (object-bv (car in)))
- (name (elf-section-name section)))
- (lp (cdr in) (1+ n) (acons name n out)
- (or shstrtab
- (and (= (elf-section-type section) SHT_STRTAB)
- (equal? (false-if-exception
- (string-table-ref bv name))
- ".shstrtab")
- bv)))))))
-
-;; Given a list of section-header/bytevector pairs, collate the sections
-;; into segments, allocate the segments, allocate the ELF bytevector,
-;; and write the segments into the bytevector, relocating as we go.
-;;
-(define* (link-elf objects #:key
- (page-aligned? #t)
- (endianness (target-endianness))
- (word-size (target-word-size)))
- (let* ((seglists (collate-objects-into-segments objects))
- (sections-by-name (compute-sections-by-name seglists))
- (nsegments (length seglists))
- (nsections (1+ (length objects))) ;; 1+ for the first reserved entry.
- (program-headers-offset (elf-header-len word-size))
- (fileaddr (+ program-headers-offset
- (* nsegments (elf-program-header-len word-size))))
- (memaddr 0))
- (receive (out fileend memend symtab _)
- (fold5
- (lambda (x out fileaddr memaddr symtab prev-flags)
- (let ((type (caar x))
- (flags (cdar x))
- (objects (cdr x)))
- (receive (segment objects symtab)
- (alloc-segment type flags objects fileaddr memaddr symtab
- (if (and page-aligned?
- (not (= flags prev-flags)))
- *page-size*
- 8))
- (values
- (cons (cons segment objects) out)
- (+ (elf-segment-offset segment) (elf-segment-filesz segment))
- (if (zero? (elf-segment-memsz segment))
- memaddr
- (+ (elf-segment-vaddr segment)
- (elf-segment-memsz segment)))
- symtab
- flags))))
- seglists '() fileaddr memaddr vlist-null 0)
- (let* ((out (reverse! out))
- (section-table-offset (+ (align fileend word-size)))
- (fileend (+ section-table-offset
- (* nsections (elf-section-header-len word-size))))
- (bv (make-bytevector fileend 0)))
- (write-elf-header bv #:byte-order endianness #:word-size word-size
- #:phoff program-headers-offset #:phnum nsegments
- #:shoff section-table-offset #:shnum nsections
- #:shstrndx (or (assoc-ref sections-by-name ".shstrtab")
- SHN_UNDEF))
- (write-elf-section-header bv section-table-offset
- endianness word-size
- (make-elf-section* #:type SHT_NULL #:flags 0
- #:addralign 0))
- (fold2 (lambda (x phidx shidx)
- (write-elf-program-header
- bv (+ program-headers-offset
- (* (elf-program-header-len word-size) phidx))
- endianness word-size (car x))
- (values
- (1+ phidx)
- (fold1 (lambda (o shidx)
- (write-object bv o symtab endianness)
- (write-elf-section-header
- bv (+ section-table-offset
- (* (elf-section-header-len word-size) shidx))
- endianness word-size (object-section o))
- (1+ shidx))
- (cdr x) shidx)))
- out 0 1)
- bv))))
diff --git a/module/system/vm/linker.scm b/module/system/vm/linker.scm
new file mode 100644
index 0000000..e9dca71
--- /dev/null
+++ b/module/system/vm/linker.scm
@@ -0,0 +1,442 @@
+;;; Guile ELF linker
+
+;; Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc.
+
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 3 of the License, or (at your option) any later version.
+;;;;
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;;; Lesser General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+;;; Commentary:
+;;;
+;;; A linker combines several linker objects into an executable or a
+;;; loadable library.
+;;;
+;;; There are several common formats for libraries out there. Since
+;;; Guile includes its own linker and loader, we are free to choose any
+;;; format, or make up our own.
+;;;
+;;; There are essentially two requirements for a linker format:
+;;; libraries should be able to be loaded with the minimal amount of
+;;; work; and they should support introspection in some way, in order to
+;;; enable good debugging.
+;;;
+;;; These requirements are somewhat at odds, as loading should not have
+;;; to stumble over features related to introspection. It so happens
+;;; that a lot of smart people have thought about this situation, and
+;;; the ELF format embodies the outcome of their thinking. Guile uses
+;;; ELF as its format, regardless of the platform's native library
+;;; format. It's not inconceivable that Guile could interoperate with
+;;; the native dynamic loader at some point, but it's not a near-term
+;;; goal.
+;;;
+;;; Guile's linker takes a list of objects, sorts them according to
+;;; similarity from the perspective of the loader, then writes them out
+;;; into one big bytevector in ELF format.
+;;;
+;;; It is often the case that different parts of a library need to refer
+;;; to each other. For example, program text may need to refer to a
+;;; constant from writable memory. When the linker places sections
+;;; (linker objects) into specific locations in the linked bytevector,
+;;; it needs to fix up those references. This process is called
+;;; /relocation/. References needing relocations are recorded in
+;;; "linker-reloc" objects, and collected in a list in each
+;;; "linker-object". The actual definitions of the references are
+;;; stored in "linker-symbol" objects, also collected in a list in each
+;;; "linker-object".
+;;;
+;;; By default, the ELF files created by the linker include some padding
+;;; so that different parts of the file can be loaded in with different
+;;; permissions. For example, some parts of the file are read-only and
+;;; thus can be shared between processes. Some parts of the file don't
+;;; need to be loaded at all. However this padding can be too much for
+;;; interactive compilation, when the code is never written out to disk;
+;;; in that case, pass #:page-aligned? #f to `link-elf'.
+;;;
+;;; Code:
+
+(define-module (system vm linker)
+ #:use-module (rnrs bytevectors)
+ #:use-module (system foreign)
+ #:use-module (system base target)
+ #:use-module (srfi srfi-9)
+ #:use-module (ice-9 receive)
+ #:use-module (ice-9 vlist)
+ #:use-module (system vm elf)
+ #:export (make-string-table
+ string-table-intern
+ link-string-table
+
+ make-linker-reloc
+ make-linker-symbol
+
+ make-linker-object
+ linker-object?
+ linker-object-section
+ linker-object-bv
+ linker-object-relocs
+ linker-object-symbols
+
+ link-elf))
+
+;; A relocation records a reference to a symbol. When the symbol is
+;; resolved to an address, the reloc location will be updated to point
+;; to the address.
+;;
+;; Two types. Abs32/1 and Abs64/1 are absolute offsets in bytes.
+;; Rel32/4 is a relative signed offset in 32-bit units. Either can have
+;; an arbitrary addend as well.
+;;
+(define-record-type <linker-reloc>
+ (make-linker-reloc type loc addend symbol)
+ linker-reloc?
+ (type linker-reloc-type) ;; rel32/4, abs32/1, abs64/1
+ (loc linker-reloc-loc)
+ (addend linker-reloc-addend)
+ (symbol linker-reloc-symbol))
+
+;; A symbol is an association between a name and an address. The
+;; address is always in regard to some particular address space. When
+;; objects come into the linker, their symbols live in the object
+;; address space. When the objects are allocated into ELF segments, the
+;; symbols will be relocated into memory address space, corresponding to
+;; the position the ELF will be loaded at.
+;;
+(define-record-type <linker-symbol>
+ (make-linker-symbol name address)
+ linker-symbol?
+ (name linker-symbol-name)
+ (address linker-symbol-address))
+
+(define-record-type <linker-object>
+ (make-linker-object section bv relocs symbols)
+ linker-object?
+ (section linker-object-section)
+ (bv linker-object-bv)
+ (relocs linker-object-relocs)
+ (symbols linker-object-symbols))
+
+(define (make-string-table)
+ '(("" 0 #vu8())))
+
+(define (string-table-length table)
+ (let ((last (car table)))
+ ;; The + 1 is for the trailing NUL byte.
+ (+ (cadr last) (bytevector-length (caddr last)) 1)))
+
+(define (string-table-intern table str)
+ (cond
+ ((assoc str table)
+ => (lambda (ent)
+ (values table (cadr ent))))
+ (else
+ (let* ((next (string-table-length table)))
+ (values (cons (list str next (string->utf8 str))
+ table)
+ next)))))
+
+(define (link-string-table table)
+ (let ((out (make-bytevector (string-table-length table) 0)))
+ (for-each
+ (lambda (ent)
+ (let ((bytes (caddr ent)))
+ (bytevector-copy! bytes 0 out (cadr ent) (bytevector-length bytes))))
+ table)
+ out))
+
+(define (segment-kind section)
+ (let ((flags (elf-section-flags section)))
+ (cons (cond
+ ((= (elf-section-type section) SHT_DYNAMIC) PT_DYNAMIC)
+ ((zero? (logand SHF_ALLOC flags)) PT_NOTE)
+ (else PT_LOAD))
+ (logior (if (zero? (logand SHF_ALLOC flags))
+ 0
+ PF_R)
+ (if (zero? (logand SHF_EXECINSTR flags))
+ 0
+ PF_X)
+ (if (zero? (logand SHF_WRITE flags))
+ 0
+ PF_W)))))
+
+(define (group-by-cars ls)
+ (let lp ((in ls) (k #f) (group #f) (out '()))
+ (cond
+ ((null? in)
+ (reverse!
+ (if group
+ (cons (cons k (reverse! group)) out)
+ out)))
+ ((and group (equal? k (caar in)))
+ (lp (cdr in) k (cons (cdar in) group) out))
+ (else
+ (lp (cdr in) (caar in) (list (cdar in))
+ (if group
+ (cons (cons k (reverse! group)) out)
+ out))))))
+
+(define (collate-objects-into-segments objects)
+ (group-by-cars
+ (stable-sort!
+ (map (lambda (o)
+ (cons (segment-kind (linker-object-section o)) o))
+ objects)
+ (lambda (x y)
+ (let ((x-type (caar x)) (y-type (caar y))
+ (x-flags (cdar x)) (y-flags (cdar y))
+ (x-section (linker-object-section (cdr x)))
+ (y-section (linker-object-section (cdr y))))
+ (cond
+ ((not (equal? x-flags y-flags))
+ (< x-flags y-flags))
+ ((not (equal? x-type y-type))
+ (< x-type y-type))
+ ((not (equal? (elf-section-type x-section)
+ (elf-section-type y-section)))
+ (cond
+ ((equal? (elf-section-type x-section) SHT_NOBITS) #t)
+ ((equal? (elf-section-type y-section) SHT_NOBITS) #f)
+ (else (< (elf-section-type x-section)
+ (elf-section-type y-section)))))
+ (else
+ (< (elf-section-size x-section)
+ (elf-section-size y-section)))))))))
+
+(define (align address alignment)
+ (+ address
+ (modulo (- alignment (modulo address alignment)) alignment)))
+
+(define (fold1 proc ls s0)
+ (let lp ((ls ls) (s0 s0))
+ (if (null? ls)
+ s0
+ (lp (cdr ls) (proc (car ls) s0)))))
+
+(define (fold2 proc ls s0 s1)
+ (let lp ((ls ls) (s0 s0) (s1 s1))
+ (if (null? ls)
+ (values s0 s1)
+ (receive (s0 s1) (proc (car ls) s0 s1)
+ (lp (cdr ls) s0 s1)))))
+
+(define (fold4 proc ls s0 s1 s2 s3)
+ (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3))
+ (if (null? ls)
+ (values s0 s1 s2 s3)
+ (receive (s0 s1 s2 s3) (proc (car ls) s0 s1 s2 s3)
+ (lp (cdr ls) s0 s1 s2 s3)))))
+
+(define (fold5 proc ls s0 s1 s2 s3 s4)
+ (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3) (s4 s4))
+ (if (null? ls)
+ (values s0 s1 s2 s3 s4)
+ (receive (s0 s1 s2 s3 s4) (proc (car ls) s0 s1 s2 s3 s4)
+ (lp (cdr ls) s0 s1 s2 s3 s4)))))
+
+(define (relocate-section-header sec fileaddr memaddr)
+ (make-elf-section #:name (elf-section-name sec)
+ #:type (elf-section-type sec)
+ #:flags (elf-section-flags sec)
+ #:addr memaddr
+ #:offset fileaddr
+ #:size (elf-section-size sec)
+ #:link (elf-section-link sec)
+ #:info (elf-section-info sec)
+ #:addralign (elf-section-addralign sec)
+ #:entsize (elf-section-entsize sec)))
+
+(define *page-size* 4096)
+
+;; Adds object symbols to global table, relocating them from object
+;; address space to memory address space.
+(define (add-symbols symbols offset symtab)
+ (fold1 (lambda (symbol symtab)
+ (let ((name (linker-symbol-name symbol))
+ (addr (linker-symbol-address symbol)))
+ (when (vhash-assq name symtab)
+ (error "duplicate symbol" name))
+ (vhash-consq name (make-linker-symbol name (+ addr offset)) symtab)))
+ symbols
+ symtab))
+
+(define (alloc-segment type flags objects fileaddr memaddr symtab alignment)
+ (let* ((loadable? (not (zero? flags)))
+ (alignment (fold1 (lambda (o alignment)
+ (lcm (elf-section-addralign
+ (linker-object-section o))
+ alignment))
+ objects
+ alignment))
+ (fileaddr (align fileaddr alignment))
+ (memaddr (align memaddr alignment)))
+ (receive (objects fileend memend symtab)
+ (fold4 (lambda (o out fileaddr memaddr symtab)
+ (let* ((section (linker-object-section o))
+ (fileaddr
+ (if (= (elf-section-type section) SHT_NOBITS)
+ fileaddr
+ (align fileaddr (elf-section-addralign section))))
+ (memaddr
+ (align memaddr (elf-section-addralign section))))
+ (values
+ (cons (make-linker-object
+ (relocate-section-header section fileaddr
+ memaddr)
+ (linker-object-bv o)
+ (linker-object-relocs o)
+ (linker-object-symbols o))
+ out)
+ (if (= (elf-section-type section) SHT_NOBITS)
+ fileaddr
+ (+ fileaddr (elf-section-size section)))
+ (+ memaddr (elf-section-size section))
+ (add-symbols (linker-object-symbols o) memaddr symtab))))
+ objects '() fileaddr memaddr symtab)
+ (values
+ (make-elf-segment #:type type #:offset fileaddr
+ #:vaddr (if loadable? memaddr 0)
+ #:filesz (- fileend fileaddr)
+ #:memsz (if loadable? (- memend memaddr) 0)
+ #:flags flags #:align alignment)
+ (reverse objects)
+ symtab))))
+
+(define (process-reloc reloc bv file-offset mem-offset symtab endianness)
+ (let ((ent (vhash-assq (linker-reloc-symbol reloc) symtab)))
+ (unless ent
+ (error "Undefined symbol" (linker-reloc-symbol reloc)))
+ (let* ((file-loc (+ (linker-reloc-loc reloc) file-offset))
+ (mem-loc (+ (linker-reloc-loc reloc) mem-offset))
+ (addr (linker-symbol-address (cdr ent))))
+ (case (linker-reloc-type reloc)
+ ((rel32/4)
+ (let ((diff (- addr mem-loc)))
+ (unless (zero? (modulo diff 4))
+ (error "Bad offset" reloc symbol mem-offset))
+ (bytevector-s32-set! bv file-loc
+ (+ (/ diff 4) (linker-reloc-addend reloc))
+ endianness)))
+ ((abs32/1)
+ (bytevector-u32-set! bv file-loc addr endianness))
+ ((abs64/1)
+ (bytevector-u64-set! bv file-loc addr endianness))
+ (else
+ (error "bad reloc type" reloc))))))
+
+(define (write-linker-object bv o symtab endianness)
+ (let* ((section (linker-object-section o))
+ (offset (elf-section-offset section))
+ (addr (elf-section-addr section))
+ (len (elf-section-size section))
+ (bytes (linker-object-bv o))
+ (relocs (linker-object-relocs o)))
+ (if (not (= (elf-section-type section) SHT_NOBITS))
+ (begin
+ (if (not (= (elf-section-size section) (bytevector-length bytes)))
+ (error "unexpected length" section bytes))
+ (bytevector-copy! bytes 0 bv offset len)
+ (for-each (lambda (reloc)
+ (process-reloc reloc bv offset addr symtab endianness))
+ relocs)))))
+
+(define (compute-sections-by-name seglists)
+ (let lp ((in (apply append (map cdr seglists)))
+ (n 1) (out '()) (shstrtab #f))
+ (if (null? in)
+ (fold1 (lambda (x tail)
+ (cond
+ ((false-if-exception
+ (string-table-ref shstrtab (car x)))
+ => (lambda (str) (acons str (cdr x) tail)))
+ (else tail)))
+ out '())
+ (let* ((section (linker-object-section (car in)))
+ (bv (linker-object-bv (car in)))
+ (name (elf-section-name section)))
+ (lp (cdr in) (1+ n) (acons name n out)
+ (or shstrtab
+ (and (= (elf-section-type section) SHT_STRTAB)
+ (equal? (false-if-exception
+ (string-table-ref bv name))
+ ".shstrtab")
+ bv)))))))
+
+;; Given a list of section-header/bytevector pairs, collate the sections
+;; into segments, allocate the segments, allocate the ELF bytevector,
+;; and write the segments into the bytevector, relocating as we go.
+;;
+(define* (link-elf objects #:key
+ (page-aligned? #t)
+ (endianness (target-endianness))
+ (word-size (target-word-size)))
+ (let* ((seglists (collate-objects-into-segments objects))
+ (sections-by-name (compute-sections-by-name seglists))
+ (nsegments (length seglists))
+ (nsections (1+ (length objects))) ;; 1+ for the first reserved entry.
+ (program-headers-offset (elf-header-len word-size))
+ (fileaddr (+ program-headers-offset
+ (* nsegments (elf-program-header-len word-size))))
+ (memaddr 0))
+ (receive (out fileend memend symtab _)
+ (fold5
+ (lambda (x out fileaddr memaddr symtab prev-flags)
+ (let ((type (caar x))
+ (flags (cdar x))
+ (objects (cdr x)))
+ (receive (segment objects symtab)
+ (alloc-segment type flags objects fileaddr memaddr symtab
+ (if (and page-aligned?
+ (not (= flags prev-flags)))
+ *page-size*
+ 8))
+ (values
+ (cons (cons segment objects) out)
+ (+ (elf-segment-offset segment) (elf-segment-filesz segment))
+ (if (zero? (elf-segment-memsz segment))
+ memaddr
+ (+ (elf-segment-vaddr segment)
+ (elf-segment-memsz segment)))
+ symtab
+ flags))))
+ seglists '() fileaddr memaddr vlist-null 0)
+ (let* ((out (reverse! out))
+ (section-table-offset (+ (align fileend word-size)))
+ (fileend (+ section-table-offset
+ (* nsections (elf-section-header-len word-size))))
+ (bv (make-bytevector fileend 0)))
+ (write-elf-header bv #:byte-order endianness #:word-size word-size
+ #:phoff program-headers-offset #:phnum nsegments
+ #:shoff section-table-offset #:shnum nsections
+ #:shstrndx (or (assoc-ref sections-by-name ".shstrtab")
+ SHN_UNDEF))
+ (write-elf-section-header bv section-table-offset
+ endianness word-size
+ (make-elf-section #:type SHT_NULL #:flags 0
+ #:addralign 0))
+ (fold2 (lambda (x phidx shidx)
+ (write-elf-program-header
+ bv (+ program-headers-offset
+ (* (elf-program-header-len word-size) phidx))
+ endianness word-size (car x))
+ (values
+ (1+ phidx)
+ (fold1 (lambda (o shidx)
+ (write-linker-object bv o symtab endianness)
+ (write-elf-section-header
+ bv (+ section-table-offset
+ (* (elf-section-header-len word-size) shidx))
+ endianness word-size (linker-object-section o))
+ (1+ shidx))
+ (cdr x) shidx)))
+ out 0 1)
+ bv))))
diff --git a/test-suite/tests/linker.test b/test-suite/tests/linker.test
new file mode 100644
index 0000000..7ea2631
--- /dev/null
+++ b/test-suite/tests/linker.test
@@ -0,0 +1,86 @@
+;;;; linker.test -*- scheme -*-
+;;;;
+;;;; Copyright 2013 Free Software Foundation, Inc.
+;;;;
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 3 of the License, or (at your option) any later version.
+;;;;
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;;; Lesser General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+(define-module (test-suite test-linker)
+ #:use-module (test-suite lib)
+ #:use-module (rnrs bytevectors)
+ #:use-module (system base target)
+ #:use-module (system vm elf)
+ #:use-module (system vm linker))
+
+(define (link-elf-with-one-main-section name bytes)
+ (let ((string-table (make-string-table)))
+ (define (intern-string! string)
+ (call-with-values
+ (lambda () (string-table-intern string-table string))
+ (lambda (table idx)
+ (set! string-table table)
+ idx)))
+ (define (make-object name bv relocs . kwargs)
+ (let ((name-idx (intern-string! (symbol->string name))))
+ (make-linker-object (apply make-elf-section
+ #:name name-idx
+ #:size (bytevector-length bv)
+ kwargs)
+ bv relocs
+ (list (make-linker-symbol name 0)))))
+ (define (make-string-table)
+ (intern-string! ".shstrtab")
+ (make-object '.shstrtab (link-string-table string-table) '()
+ #:type SHT_STRTAB #:flags 0))
+ (let* ((word-size (target-word-size))
+ (endianness (target-endianness))
+ (sec (make-object name bytes '()))
+ ;; This needs to be linked last, because linking other
+ ;; sections adds entries to the string table.
+ (shstrtab (make-string-table)))
+ (link-elf (list sec shstrtab)
+ #:endianness endianness #:word-size word-size))))
+
+(with-test-prefix "simple"
+ (define foo-bytes #vu8(0 1 2 3 4 5))
+ (define bytes #f)
+ (define elf #f)
+
+ (define (bytevectors-equal? bv-a bv-b start-a start-b size)
+ (or (zero? size)
+ (and (equal? (bytevector-u8-ref bv-a start-a)
+ (bytevector-u8-ref bv-b start-b))
+ (bytevectors-equal? bv-a bv-b (1+ start-a) (1+ start-b)
+ (1- size)))))
+
+ (pass-if "linking succeeds"
+ (begin
+ (set! bytes (link-elf-with-one-main-section '.foo foo-bytes))
+ #t))
+
+ (pass-if "parsing succeeds"
+ (begin
+ (set! elf (parse-elf bytes))
+ (elf? elf)))
+
+ ;; 3 sections: the initial NULL section, .foo, and .shstrtab.
+ (pass-if-equal 3 (elf-shnum elf))
+
+ (pass-if ".foo section checks out"
+ (let ((sec (assoc-ref (elf-sections-by-name elf) ".foo")))
+ (and sec
+ (= (elf-section-size sec) (bytevector-length foo-bytes))
+ (bytevectors-equal? bytes foo-bytes
+ (elf-section-offset sec) 0
+ (bytevector-length foo-bytes))))))
--
1.7.10.4
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH 1/6] split linker out of elf module
2013-05-18 15:05 ` [PATCH 1/6] split linker out of elf module Andy Wingo
@ 2013-05-22 20:39 ` Ludovic Courtès
2013-05-23 7:26 ` Andy Wingo
0 siblings, 1 reply; 24+ messages in thread
From: Ludovic Courtès @ 2013-05-22 20:39 UTC (permalink / raw)
To: guile-devel
Hello!
Note: I’m just familiarizing with the new code, so I may miss things,
and may make mostly superficial comments.
Andy Wingo <wingo@pobox.com> skribis:
> * module/Makefile.am:
> * module/system/vm/linker.scm: New file, split out of (system vm elf).
>
> * module/system/vm/elf.scm: Remove linking capabilities.
>
> * module/language/objcode/elf.scm: Adapt caller to use (system vm
> linker).
>
> * test-suite/tests/linker.test: New test.
Looks good. Would be nice to mention variable names too.
It seems elf.scm would benefit from byte structures similar to those
announced by Taylan on guile-user. That’s more for aesthetics though,
and definitely not blocking.
Docstrings are lacking in those files.
Ludo’.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 1/6] split linker out of elf module
2013-05-22 20:39 ` Ludovic Courtès
@ 2013-05-23 7:26 ` Andy Wingo
0 siblings, 0 replies; 24+ messages in thread
From: Andy Wingo @ 2013-05-23 7:26 UTC (permalink / raw)
To: Ludovic Courtès; +Cc: guile-devel
On Wed 22 May 2013 22:39, ludo@gnu.org (Ludovic Courtès) writes:
> Looks good. Would be nice to mention variable names too.
Will do.
> It seems elf.scm would benefit from byte structures similar to those
> announced by Taylan on guile-user. That’s more for aesthetics though,
> and definitely not blocking.
Yes, but it's tricky. On-disk you can be 32-bit or 64-bit, and with
differing endiannesses. Makes it tricky to read. And on the writing
side you have the same, but also that you usually start from Scheme data
instead of bytevectors. I'm sure there is some useful abstraction
lurking there, but I haven't found it yet.
> Docstrings are lacking in those files.
Will fix.
Andy
--
http://wingolog.org/
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 2/6] ELF refactor and consequent linker simplifications
2013-05-18 15:05 wip-linker: Refactor ELF linker and loader Andy Wingo
2013-05-18 15:05 ` [PATCH 1/6] split linker out of elf module Andy Wingo
@ 2013-05-18 15:05 ` Andy Wingo
2013-05-22 20:44 ` Ludovic Courtès
2013-05-18 15:05 ` [PATCH 3/6] elf: add accessors for header members that might need relocation Andy Wingo
` (4 subsequent siblings)
6 siblings, 1 reply; 24+ messages in thread
From: Andy Wingo @ 2013-05-18 15:05 UTC (permalink / raw)
To: guile-devel; +Cc: Andy Wingo
* module/system/vm/elf.scm: Add commentary.
(make-elf): Add a constructor similar to make-elf-segment and
make-elf-section.
(write-elf32-header, write-elf64-header, write-elf-header): Take an
<elf> instead of all the fields separately.
(<elf-segment>, <elf-section>): Add "index" property. Adapt
constructors accordingly.
* module/language/objcode/elf.scm (bytecode->elf): Arrange to set the
section indexes when creating ELF sections.
* module/system/vm/linker.scm (alloc-segment, relocate-section-header):
Arrange to set segment and section indexes.
(find-shstrndx): New helper, replaces compute-sections-by-name. Now
that sections know their indexes, this is easier.
(allocate-elf, write-elf): New helpers, factored out of link-elf.
Easier now that sections have indexes.
(link-elf): Simplify. Check that the incoming objects have sensible
numbers.
* test-suite/tests/linker.test: Update to set #:index on the linker
objects.
---
module/language/objcode/elf.scm | 17 +--
module/system/vm/elf.scm | 188 +++++++++++++++++++--------------
module/system/vm/linker.scm | 223 +++++++++++++++++++++------------------
test-suite/tests/linker.test | 7 +-
4 files changed, 238 insertions(+), 197 deletions(-)
diff --git a/module/language/objcode/elf.scm b/module/language/objcode/elf.scm
index 1edfdcf..981c398 100644
--- a/module/language/objcode/elf.scm
+++ b/module/language/objcode/elf.scm
@@ -41,15 +41,16 @@
(lambda (table idx)
(set! string-table table)
idx)))
- (define (make-object name bv relocs . kwargs)
+ (define (make-object index name bv relocs . kwargs)
(let ((name-idx (intern-string! (symbol->string name))))
(make-linker-object (apply make-elf-section
+ #:index index
#:name name-idx
#:size (bytevector-length bv)
kwargs)
bv relocs
(list (make-linker-symbol name 0)))))
- (define (make-dynamic-section word-size endianness)
+ (define (make-dynamic-section index word-size endianness)
(define (make-dynamic-section/32)
(let ((bv (make-bytevector 24 0)))
(bytevector-u32-set! bv 0 DT_GUILE_RTL_VERSION endianness)
@@ -74,19 +75,19 @@
((8) (make-dynamic-section/64))
(else (error "unexpected word size" word-size))))
(lambda (bv reloc)
- (make-object '.dynamic bv (list reloc)
+ (make-object index '.dynamic bv (list reloc)
#:type SHT_DYNAMIC #:flags SHF_ALLOC))))
- (define (make-string-table)
+ (define (make-string-table index)
(intern-string! ".shstrtab")
- (make-object '.shstrtab (link-string-table string-table) '()
+ (make-object index '.shstrtab (link-string-table string-table) '()
#:type SHT_STRTAB #:flags 0))
(let* ((word-size (target-word-size))
(endianness (target-endianness))
- (text (make-object '.rtl-text bv '()))
- (dt (make-dynamic-section word-size endianness))
+ (text (make-object 1 '.rtl-text bv '()))
+ (dt (make-dynamic-section 2 word-size endianness))
;; This needs to be linked last, because linking other
;; sections adds entries to the string table.
- (shstrtab (make-string-table)))
+ (shstrtab (make-string-table 3)))
(link-elf (list text dt shstrtab)
#:endianness endianness #:word-size word-size))))
diff --git a/module/system/vm/elf.scm b/module/system/vm/elf.scm
index e2b2454..efa9782 100644
--- a/module/system/vm/elf.scm
+++ b/module/system/vm/elf.scm
@@ -16,6 +16,19 @@
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;;; Commentary:
+;;;
+;;; A module to read and write Executable and Linking Format (ELF)
+;;; files.
+;;;
+;;; This module exports a number of record types that represent the
+;;; various parts that make up ELF files. Fundamentally this is the
+;;; main header, the segment headers (program headers), and the section
+;;; headers. It also exports bindings for symbolic constants and
+;;; utilities to parse and write special kinds of ELF sections.
+;;;
+;;; See elf(5) for more information on ELF.
+;;;
;;; Code:
(define-module (system vm elf)
@@ -27,7 +40,8 @@
#:use-module (ice-9 vlist)
#:export (has-elf-header?
- make-elf elf?
+ (make-elf* . make-elf)
+ elf?
elf-bytes elf-word-size elf-byte-order
elf-abi elf-type elf-machine-type
elf-entry elf-phoff elf-shoff elf-flags elf-ehsize
@@ -37,6 +51,7 @@
(make-elf-segment* . make-elf-segment)
elf-segment?
+ elf-segment-index
elf-segment-type elf-segment-offset elf-segment-vaddr
elf-segment-paddr elf-segment-filesz elf-segment-memsz
elf-segment-flags elf-segment-align
@@ -51,6 +66,7 @@
(make-elf-section* . make-elf-section)
elf-section?
+ elf-section-index
elf-section-name elf-section-type elf-section-flags
elf-section-addr elf-section-offset elf-section-size
elf-section-link elf-section-info elf-section-addralign
@@ -242,6 +258,26 @@
(shnum elf-shnum)
(shstrndx elf-shstrndx))
+(define* (make-elf* #:key (bytes #f)
+ (byte-order (target-endianness))
+ (word-size (target-word-size))
+ (abi ELFOSABI_STANDALONE)
+ (type ET_DYN)
+ (machine-type EM_NONE)
+ (entry 0)
+ (phoff (elf-header-len word-size))
+ (shoff -1)
+ (flags 0)
+ (ehsize (elf-header-len word-size))
+ (phentsize (elf-program-header-len word-size))
+ (phnum 0)
+ (shentsize (elf-section-header-len word-size))
+ (shnum 0)
+ (shstrndx SHN_UNDEF))
+ (make-elf bytes word-size byte-order abi type machine-type
+ entry phoff shoff flags ehsize
+ phentsize phnum shentsize shnum shstrndx))
+
(define (parse-elf32 bv byte-order)
(make-elf bv 4 byte-order
(bytevector-u8-ref bv 7)
@@ -276,28 +312,27 @@
(bytevector-u8-set! bv 14 0)
(bytevector-u8-set! bv 15 0))
-(define (write-elf32 bv byte-order abi type machine-type
- entry phoff shoff flags ehsize phentsize phnum
- shentsize shnum shstrndx)
- (write-elf-ident bv ELFCLASS32
- (case byte-order
- ((little) ELFDATA2LSB)
- ((big) ELFDATA2MSB)
- (else (error "unknown endianness" byte-order)))
- abi)
- (bytevector-u16-set! bv 16 type byte-order)
- (bytevector-u16-set! bv 18 machine-type byte-order)
- (bytevector-u32-set! bv 20 EV_CURRENT byte-order)
- (bytevector-u32-set! bv 24 entry byte-order)
- (bytevector-u32-set! bv 28 phoff byte-order)
- (bytevector-u32-set! bv 32 shoff byte-order)
- (bytevector-u32-set! bv 36 flags byte-order)
- (bytevector-u16-set! bv 40 ehsize byte-order)
- (bytevector-u16-set! bv 42 phentsize byte-order)
- (bytevector-u16-set! bv 44 phnum byte-order)
- (bytevector-u16-set! bv 46 shentsize byte-order)
- (bytevector-u16-set! bv 48 shnum byte-order)
- (bytevector-u16-set! bv 50 shstrndx byte-order))
+(define (write-elf32-header bv elf)
+ (let ((byte-order (elf-byte-order elf)))
+ (write-elf-ident bv ELFCLASS32
+ (case byte-order
+ ((little) ELFDATA2LSB)
+ ((big) ELFDATA2MSB)
+ (else (error "unknown endianness" byte-order)))
+ (elf-abi elf))
+ (bytevector-u16-set! bv 16 (elf-type elf) byte-order)
+ (bytevector-u16-set! bv 18 (elf-machine-type elf) byte-order)
+ (bytevector-u32-set! bv 20 EV_CURRENT byte-order)
+ (bytevector-u32-set! bv 24 (elf-entry elf) byte-order)
+ (bytevector-u32-set! bv 28 (elf-phoff elf) byte-order)
+ (bytevector-u32-set! bv 32 (elf-shoff elf) byte-order)
+ (bytevector-u32-set! bv 36 (elf-flags elf) byte-order)
+ (bytevector-u16-set! bv 40 (elf-ehsize elf) byte-order)
+ (bytevector-u16-set! bv 42 (elf-phentsize elf) byte-order)
+ (bytevector-u16-set! bv 44 (elf-phnum elf) byte-order)
+ (bytevector-u16-set! bv 46 (elf-shentsize elf) byte-order)
+ (bytevector-u16-set! bv 48 (elf-shnum elf) byte-order)
+ (bytevector-u16-set! bv 50 (elf-shstrndx elf) byte-order)))
(define (parse-elf64 bv byte-order)
(make-elf bv 8 byte-order
@@ -315,28 +350,27 @@
(bytevector-u16-ref bv 60 byte-order)
(bytevector-u16-ref bv 62 byte-order)))
-(define (write-elf64 bv byte-order abi type machine-type
- entry phoff shoff flags ehsize phentsize phnum
- shentsize shnum shstrndx)
- (write-elf-ident bv ELFCLASS64
- (case byte-order
- ((little) ELFDATA2LSB)
- ((big) ELFDATA2MSB)
- (else (error "unknown endianness" byte-order)))
- abi)
- (bytevector-u16-set! bv 16 type byte-order)
- (bytevector-u16-set! bv 18 machine-type byte-order)
- (bytevector-u32-set! bv 20 EV_CURRENT byte-order)
- (bytevector-u64-set! bv 24 entry byte-order)
- (bytevector-u64-set! bv 32 phoff byte-order)
- (bytevector-u64-set! bv 40 shoff byte-order)
- (bytevector-u32-set! bv 48 flags byte-order)
- (bytevector-u16-set! bv 52 ehsize byte-order)
- (bytevector-u16-set! bv 54 phentsize byte-order)
- (bytevector-u16-set! bv 56 phnum byte-order)
- (bytevector-u16-set! bv 58 shentsize byte-order)
- (bytevector-u16-set! bv 60 shnum byte-order)
- (bytevector-u16-set! bv 62 shstrndx byte-order))
+(define (write-elf64-header bv elf)
+ (let ((byte-order (elf-byte-order elf)))
+ (write-elf-ident bv ELFCLASS64
+ (case byte-order
+ ((little) ELFDATA2LSB)
+ ((big) ELFDATA2MSB)
+ (else (error "unknown endianness" byte-order)))
+ (elf-abi elf))
+ (bytevector-u16-set! bv 16 (elf-type elf) byte-order)
+ (bytevector-u16-set! bv 18 (elf-machine-type elf) byte-order)
+ (bytevector-u32-set! bv 20 EV_CURRENT byte-order)
+ (bytevector-u64-set! bv 24 (elf-entry elf) byte-order)
+ (bytevector-u64-set! bv 32 (elf-phoff elf) byte-order)
+ (bytevector-u64-set! bv 40 (elf-shoff elf) byte-order)
+ (bytevector-u32-set! bv 48 (elf-flags elf) byte-order)
+ (bytevector-u16-set! bv 52 (elf-ehsize elf) byte-order)
+ (bytevector-u16-set! bv 54 (elf-phentsize elf) byte-order)
+ (bytevector-u16-set! bv 56 (elf-phnum elf) byte-order)
+ (bytevector-u16-set! bv 58 (elf-shentsize elf) byte-order)
+ (bytevector-u16-set! bv 60 (elf-shnum elf) byte-order)
+ (bytevector-u16-set! bv 62 (elf-shstrndx elf) byte-order)))
(define (parse-elf bv)
(cond
@@ -354,28 +388,12 @@
(else
(error "Invalid ELF" bv))))
-(define* (write-elf-header bv #:key
- (byte-order (target-endianness))
- (word-size (target-word-size))
- (abi ELFOSABI_STANDALONE)
- (type ET_DYN)
- (machine-type EM_NONE)
- (entry 0)
- (phoff (elf-header-len word-size))
- (shoff -1)
- (flags 0)
- (ehsize (elf-header-len word-size))
- (phentsize (elf-program-header-len word-size))
- (phnum 0)
- (shentsize (elf-section-header-len word-size))
- (shnum 0)
- (shstrndx SHN_UNDEF))
- ((case word-size
- ((4) write-elf32)
- ((8) write-elf64)
- (else (error "unknown word size" word-size)))
- bv byte-order abi type machine-type entry phoff shoff
- flags ehsize phentsize phnum shentsize shnum shstrndx))
+(define* (write-elf-header bv elf)
+ ((case (elf-word-size elf)
+ ((4) write-elf32-header)
+ ((8) write-elf64-header)
+ (else (error "unknown word size" (elf-word-size elf))))
+ bv elf))
;;
;; Segment types
@@ -402,8 +420,9 @@
(define PF_R (ash 1 2)) ; Segment is readable
(define-record-type <elf-segment>
- (make-elf-segment type offset vaddr paddr filesz memsz flags align)
+ (make-elf-segment index type offset vaddr paddr filesz memsz flags align)
elf-segment?
+ (index elf-segment-index)
(type elf-segment-type)
(offset elf-segment-offset)
(vaddr elf-segment-vaddr)
@@ -413,11 +432,11 @@
(flags elf-segment-flags)
(align elf-segment-align))
-(define* (make-elf-segment* #:key (type PT_LOAD) (offset 0) (vaddr 0)
+(define* (make-elf-segment* #:key (index -1) (type PT_LOAD) (offset 0) (vaddr 0)
(paddr 0) (filesz 0) (memsz filesz)
(flags (logior PF_W PF_R))
(align 8))
- (make-elf-segment type offset vaddr paddr filesz memsz flags align))
+ (make-elf-segment index type offset vaddr paddr filesz memsz flags align))
;; typedef struct {
;; uint32_t p_type;
@@ -430,9 +449,10 @@
;; uint32_t p_align;
;; } Elf32_Phdr;
-(define (parse-elf32-program-header bv offset byte-order)
+(define (parse-elf32-program-header index bv offset byte-order)
(if (<= (+ offset 32) (bytevector-length bv))
- (make-elf-segment (bytevector-u32-ref bv offset byte-order)
+ (make-elf-segment index
+ (bytevector-u32-ref bv offset byte-order)
(bytevector-u32-ref bv (+ offset 4) byte-order)
(bytevector-u32-ref bv (+ offset 8) byte-order)
(bytevector-u32-ref bv (+ offset 12) byte-order)
@@ -466,9 +486,10 @@
;; NB: position of `flags' is different!
-(define (parse-elf64-program-header bv offset byte-order)
+(define (parse-elf64-program-header index bv offset byte-order)
(if (<= (+ offset 56) (bytevector-length bv))
- (make-elf-segment (bytevector-u32-ref bv offset byte-order)
+ (make-elf-segment index
+ (bytevector-u32-ref bv offset byte-order)
(bytevector-u64-ref bv (+ offset 8) byte-order)
(bytevector-u64-ref bv (+ offset 16) byte-order)
(bytevector-u64-ref bv (+ offset 24) byte-order)
@@ -519,8 +540,10 @@
(lp (1- n) (cons (elf-segment elf (1- n)) out)))))
(define-record-type <elf-section>
- (make-elf-section name type flags addr offset size link info addralign entsize)
+ (make-elf-section index name type flags
+ addr offset size link info addralign entsize)
elf-section?
+ (index elf-section-index)
(name elf-section-name)
(type elf-section-type)
(flags elf-section-flags)
@@ -532,10 +555,10 @@
(addralign elf-section-addralign)
(entsize elf-section-entsize))
-(define* (make-elf-section* #:key (name 0) (type SHT_PROGBITS)
+(define* (make-elf-section* #:key (index SHN_UNDEF) (name 0) (type SHT_PROGBITS)
(flags SHF_ALLOC) (addr 0) (offset 0) (size 0)
(link 0) (info 0) (addralign 8) (entsize 0))
- (make-elf-section name type flags addr offset size link info addralign
+ (make-elf-section index name type flags addr offset size link info addralign
entsize))
;; typedef struct {
@@ -551,9 +574,10 @@
;; uint32_t sh_entsize;
;; } Elf32_Shdr;
-(define (parse-elf32-section-header bv offset byte-order)
+(define (parse-elf32-section-header index bv offset byte-order)
(if (<= (+ offset 40) (bytevector-length bv))
- (make-elf-section (bytevector-u32-ref bv offset byte-order)
+ (make-elf-section index
+ (bytevector-u32-ref bv offset byte-order)
(bytevector-u32-ref bv (+ offset 4) byte-order)
(bytevector-u32-ref bv (+ offset 8) byte-order)
(bytevector-u32-ref bv (+ offset 12) byte-order)
@@ -597,9 +621,10 @@
((8) 64)
(else (error "bad word size" word-size))))
-(define (parse-elf64-section-header bv offset byte-order)
+(define (parse-elf64-section-header index bv offset byte-order)
(if (<= (+ offset 64) (bytevector-length bv))
- (make-elf-section (bytevector-u32-ref bv offset byte-order)
+ (make-elf-section index
+ (bytevector-u32-ref bv offset byte-order)
(bytevector-u32-ref bv (+ offset 4) byte-order)
(bytevector-u64-ref bv (+ offset 8) byte-order)
(bytevector-u64-ref bv (+ offset 16) byte-order)
@@ -630,6 +655,7 @@
((4) parse-elf32-section-header)
((8) parse-elf64-section-header)
(else (error "unhandled pointer size")))
+ n
(elf-bytes elf)
(+ (elf-shoff elf) (* n (elf-shentsize elf)))
(elf-byte-order elf)))
diff --git a/module/system/vm/linker.scm b/module/system/vm/linker.scm
index e9dca71..580981a 100644
--- a/module/system/vm/linker.scm
+++ b/module/system/vm/linker.scm
@@ -68,15 +68,13 @@
#:use-module (rnrs bytevectors)
#:use-module (system foreign)
#:use-module (system base target)
+ #:use-module ((srfi srfi-1) #:select (append-map))
#:use-module (srfi srfi-9)
#:use-module (ice-9 receive)
#:use-module (ice-9 vlist)
+ #:use-module (ice-9 match)
#:use-module (system vm elf)
- #:export (make-string-table
- string-table-intern
- link-string-table
-
- make-linker-reloc
+ #:export (make-linker-reloc
make-linker-symbol
make-linker-object
@@ -86,6 +84,10 @@
linker-object-relocs
linker-object-symbols
+ make-string-table
+ string-table-intern
+ link-string-table
+
link-elf))
;; A relocation records a reference to a symbol. When the symbol is
@@ -222,13 +224,6 @@
s0
(lp (cdr ls) (proc (car ls) s0)))))
-(define (fold2 proc ls s0 s1)
- (let lp ((ls ls) (s0 s0) (s1 s1))
- (if (null? ls)
- (values s0 s1)
- (receive (s0 s1) (proc (car ls) s0 s1)
- (lp (cdr ls) s0 s1)))))
-
(define (fold4 proc ls s0 s1 s2 s3)
(let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3))
(if (null? ls)
@@ -236,15 +231,9 @@
(receive (s0 s1 s2 s3) (proc (car ls) s0 s1 s2 s3)
(lp (cdr ls) s0 s1 s2 s3)))))
-(define (fold5 proc ls s0 s1 s2 s3 s4)
- (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3) (s4 s4))
- (if (null? ls)
- (values s0 s1 s2 s3 s4)
- (receive (s0 s1 s2 s3 s4) (proc (car ls) s0 s1 s2 s3 s4)
- (lp (cdr ls) s0 s1 s2 s3 s4)))))
-
(define (relocate-section-header sec fileaddr memaddr)
- (make-elf-section #:name (elf-section-name sec)
+ (make-elf-section #:index (elf-section-index sec)
+ #:name (elf-section-name sec)
#:type (elf-section-type sec)
#:flags (elf-section-flags sec)
#:addr memaddr
@@ -269,7 +258,8 @@
symbols
symtab))
-(define (alloc-segment type flags objects fileaddr memaddr symtab alignment)
+(define (alloc-segment phidx type flags objects
+ fileaddr memaddr symtab alignment)
(let* ((loadable? (not (zero? flags)))
(alignment (fold1 (lambda (o alignment)
(lcm (elf-section-addralign
@@ -303,7 +293,8 @@
(add-symbols (linker-object-symbols o) memaddr symtab))))
objects '() fileaddr memaddr symtab)
(values
- (make-elf-segment #:type type #:offset fileaddr
+ (make-elf-segment #:index phidx
+ #:type type #:offset fileaddr
#:vaddr (if loadable? memaddr 0)
#:filesz (- fileend fileaddr)
#:memsz (if loadable? (- memend memaddr) 0)
@@ -342,34 +333,113 @@
(relocs (linker-object-relocs o)))
(if (not (= (elf-section-type section) SHT_NOBITS))
(begin
- (if (not (= (elf-section-size section) (bytevector-length bytes)))
+ (if (not (= len (bytevector-length bytes)))
(error "unexpected length" section bytes))
(bytevector-copy! bytes 0 bv offset len)
(for-each (lambda (reloc)
(process-reloc reloc bv offset addr symtab endianness))
relocs)))))
-(define (compute-sections-by-name seglists)
- (let lp ((in (apply append (map cdr seglists)))
- (n 1) (out '()) (shstrtab #f))
- (if (null? in)
- (fold1 (lambda (x tail)
- (cond
- ((false-if-exception
- (string-table-ref shstrtab (car x)))
- => (lambda (str) (acons str (cdr x) tail)))
- (else tail)))
- out '())
- (let* ((section (linker-object-section (car in)))
- (bv (linker-object-bv (car in)))
- (name (elf-section-name section)))
- (lp (cdr in) (1+ n) (acons name n out)
- (or shstrtab
- (and (= (elf-section-type section) SHT_STRTAB)
- (equal? (false-if-exception
- (string-table-ref bv name))
- ".shstrtab")
- bv)))))))
+(define (find-shstrndx objects)
+ (or-map (lambda (object)
+ (let* ((section (linker-object-section object))
+ (bv (linker-object-bv object))
+ (name (elf-section-name section)))
+ (and (= (elf-section-type section) SHT_STRTAB)
+ (equal? (false-if-exception (string-table-ref bv name))
+ ".shstrtab")
+ (elf-section-index section))))
+ objects))
+
+;; objects ::= list of <linker-object>
+;; => 3 values: ELF header, program headers, objects
+(define (allocate-elf objects page-aligned? endianness word-size)
+ (let* ((seglists (collate-objects-into-segments objects))
+ (nsegments (length seglists))
+ (nsections (1+ (length objects))) ;; 1+ for the first reserved entry.
+ (program-headers-offset (elf-header-len word-size))
+ (fileaddr (+ program-headers-offset
+ (* nsegments (elf-program-header-len word-size))))
+ (memaddr 0))
+ (let lp ((seglists seglists)
+ (segments '())
+ (objects '())
+ (phidx 0)
+ (fileaddr fileaddr)
+ (memaddr memaddr)
+ (symtab vlist-null)
+ (prev-flags 0))
+ (match seglists
+ ((((type . flags) objs-in ...) seglists ...)
+ (receive (segment objs-out symtab)
+ (alloc-segment phidx type flags objs-in fileaddr memaddr symtab
+ (if (and page-aligned?
+ (not (= flags prev-flags)))
+ *page-size*
+ 8))
+ (lp seglists
+ (cons segment segments)
+ (fold1 cons objs-out objects)
+ (1+ phidx)
+ (+ (elf-segment-offset segment) (elf-segment-filesz segment))
+ (if (zero? (elf-segment-memsz segment))
+ memaddr
+ (+ (elf-segment-vaddr segment)
+ (elf-segment-memsz segment)))
+ symtab
+ flags)))
+ (()
+ (let ((section-table-offset (+ (align fileaddr word-size))))
+ (values
+ (make-elf #:byte-order endianness #:word-size word-size
+ #:phoff program-headers-offset #:phnum nsegments
+ #:shoff section-table-offset #:shnum nsections
+ #:shstrndx (or (find-shstrndx objects) SHN_UNDEF))
+ (reverse segments)
+ (let ((null-section (make-elf-section #:index 0 #:type SHT_NULL
+ #:flags 0 #:addralign 0)))
+ (cons (make-linker-object null-section #vu8() '() '())
+ (reverse objects)))
+ symtab)))))))
+
+(define (write-elf header segments objects symtab)
+ (define (phoff n)
+ (+ (elf-phoff header) (* n (elf-phentsize header))))
+ (define (shoff n)
+ (+ (elf-shoff header) (* n (elf-shentsize header))))
+ (let ((endianness (elf-byte-order header))
+ (word-size (elf-word-size header))
+ (bv (make-bytevector (shoff (elf-shnum header)) 0)))
+ (write-elf-header bv header)
+ (for-each
+ (lambda (segment)
+ (write-elf-program-header bv (phoff (elf-segment-index segment))
+ endianness word-size segment))
+ segments)
+ (for-each
+ (lambda (object)
+ (let ((section (linker-object-section object)))
+ (write-elf-section-header bv (shoff (elf-section-index section))
+ endianness word-size section))
+ (write-linker-object bv object symtab endianness))
+ objects)
+ bv))
+
+(define (check-section-numbers objects)
+ (let* ((nsections (1+ (length objects))) ; 1+ for initial NULL section.
+ (sections (make-vector nsections #f)))
+ (for-each (lambda (object)
+ (let ((n (elf-section-index (linker-object-section object))))
+ (cond
+ ((< n 1)
+ (error "Invalid section number" object))
+ ((>= n nsections)
+ (error "Invalid section number" object))
+ ((vector-ref sections n)
+ (error "Duplicate section" (vector-ref sections n) object))
+ (else
+ (vector-set! sections n object)))))
+ objects)))
;; Given a list of section-header/bytevector pairs, collate the sections
;; into segments, allocate the segments, allocate the ELF bytevector,
@@ -379,64 +449,7 @@
(page-aligned? #t)
(endianness (target-endianness))
(word-size (target-word-size)))
- (let* ((seglists (collate-objects-into-segments objects))
- (sections-by-name (compute-sections-by-name seglists))
- (nsegments (length seglists))
- (nsections (1+ (length objects))) ;; 1+ for the first reserved entry.
- (program-headers-offset (elf-header-len word-size))
- (fileaddr (+ program-headers-offset
- (* nsegments (elf-program-header-len word-size))))
- (memaddr 0))
- (receive (out fileend memend symtab _)
- (fold5
- (lambda (x out fileaddr memaddr symtab prev-flags)
- (let ((type (caar x))
- (flags (cdar x))
- (objects (cdr x)))
- (receive (segment objects symtab)
- (alloc-segment type flags objects fileaddr memaddr symtab
- (if (and page-aligned?
- (not (= flags prev-flags)))
- *page-size*
- 8))
- (values
- (cons (cons segment objects) out)
- (+ (elf-segment-offset segment) (elf-segment-filesz segment))
- (if (zero? (elf-segment-memsz segment))
- memaddr
- (+ (elf-segment-vaddr segment)
- (elf-segment-memsz segment)))
- symtab
- flags))))
- seglists '() fileaddr memaddr vlist-null 0)
- (let* ((out (reverse! out))
- (section-table-offset (+ (align fileend word-size)))
- (fileend (+ section-table-offset
- (* nsections (elf-section-header-len word-size))))
- (bv (make-bytevector fileend 0)))
- (write-elf-header bv #:byte-order endianness #:word-size word-size
- #:phoff program-headers-offset #:phnum nsegments
- #:shoff section-table-offset #:shnum nsections
- #:shstrndx (or (assoc-ref sections-by-name ".shstrtab")
- SHN_UNDEF))
- (write-elf-section-header bv section-table-offset
- endianness word-size
- (make-elf-section #:type SHT_NULL #:flags 0
- #:addralign 0))
- (fold2 (lambda (x phidx shidx)
- (write-elf-program-header
- bv (+ program-headers-offset
- (* (elf-program-header-len word-size) phidx))
- endianness word-size (car x))
- (values
- (1+ phidx)
- (fold1 (lambda (o shidx)
- (write-linker-object bv o symtab endianness)
- (write-elf-section-header
- bv (+ section-table-offset
- (* (elf-section-header-len word-size) shidx))
- endianness word-size (linker-object-section o))
- (1+ shidx))
- (cdr x) shidx)))
- out 0 1)
- bv))))
+ (check-section-numbers objects)
+ (receive (header segments objects symtab)
+ (allocate-elf objects page-aligned? endianness word-size)
+ (write-elf header segments objects symtab)))
diff --git a/test-suite/tests/linker.test b/test-suite/tests/linker.test
index 7ea2631..97f7912 100644
--- a/test-suite/tests/linker.test
+++ b/test-suite/tests/linker.test
@@ -31,9 +31,10 @@
(lambda (table idx)
(set! string-table table)
idx)))
- (define (make-object name bv relocs . kwargs)
+ (define (make-object index name bv relocs . kwargs)
(let ((name-idx (intern-string! (symbol->string name))))
(make-linker-object (apply make-elf-section
+ #:index index
#:name name-idx
#:size (bytevector-length bv)
kwargs)
@@ -41,11 +42,11 @@
(list (make-linker-symbol name 0)))))
(define (make-string-table)
(intern-string! ".shstrtab")
- (make-object '.shstrtab (link-string-table string-table) '()
+ (make-object 2 '.shstrtab (link-string-table string-table) '()
#:type SHT_STRTAB #:flags 0))
(let* ((word-size (target-word-size))
(endianness (target-endianness))
- (sec (make-object name bytes '()))
+ (sec (make-object 1 name bytes '()))
;; This needs to be linked last, because linking other
;; sections adds entries to the string table.
(shstrtab (make-string-table)))
--
1.7.10.4
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH 2/6] ELF refactor and consequent linker simplifications
2013-05-18 15:05 ` [PATCH 2/6] ELF refactor and consequent linker simplifications Andy Wingo
@ 2013-05-22 20:44 ` Ludovic Courtès
2013-05-23 8:47 ` Andy Wingo
0 siblings, 1 reply; 24+ messages in thread
From: Ludovic Courtès @ 2013-05-22 20:44 UTC (permalink / raw)
To: guile-devel
Andy Wingo <wingo@pobox.com> skribis:
> * module/system/vm/elf.scm: Add commentary.
> (make-elf): Add a constructor similar to make-elf-segment and
> make-elf-section.
> (write-elf32-header, write-elf64-header, write-elf-header): Take an
> <elf> instead of all the fields separately.
> (<elf-segment>, <elf-section>): Add "index" property. Adapt
> constructors accordingly.
>
> * module/language/objcode/elf.scm (bytecode->elf): Arrange to set the
> section indexes when creating ELF sections.
>
> * module/system/vm/linker.scm (alloc-segment, relocate-section-header):
> Arrange to set segment and section indexes.
> (find-shstrndx): New helper, replaces compute-sections-by-name. Now
> that sections know their indexes, this is easier.
> (allocate-elf, write-elf): New helpers, factored out of link-elf.
> Easier now that sections have indexes.
> (link-elf): Simplify. Check that the incoming objects have sensible
> numbers.
>
> * test-suite/tests/linker.test: Update to set #:index on the linker
> objects.
Looks good.
> -(define (fold2 proc ls s0 s1)
> - (let lp ((ls ls) (s0 s0) (s1 s1))
> - (if (null? ls)
> - (values s0 s1)
> - (receive (s0 s1) (proc (car ls) s0 s1)
> - (lp (cdr ls) s0 s1)))))
> -
> (define (fold4 proc ls s0 s1 s2 s3)
> (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3))
> (if (null? ls)
> @@ -236,15 +231,9 @@
> (receive (s0 s1 s2 s3) (proc (car ls) s0 s1 s2 s3)
> (lp (cdr ls) s0 s1 s2 s3)))))
>
> -(define (fold5 proc ls s0 s1 s2 s3 s4)
> - (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3) (s4 s4))
> - (if (null? ls)
> - (values s0 s1 s2 s3 s4)
> - (receive (s0 s1 s2 s3 s4) (proc (car ls) s0 s1 s2 s3 s4)
> - (lp (cdr ls) s0 s1 s2 s3 s4)))))
What about moving these to a helper module eventually?
Ludo’.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/6] ELF refactor and consequent linker simplifications
2013-05-22 20:44 ` Ludovic Courtès
@ 2013-05-23 8:47 ` Andy Wingo
2013-05-23 10:27 ` Ludovic Courtès
0 siblings, 1 reply; 24+ messages in thread
From: Andy Wingo @ 2013-05-23 8:47 UTC (permalink / raw)
To: Ludovic Courtès; +Cc: guile-devel
On Wed 22 May 2013 22:44, ludo@gnu.org (Ludovic Courtès) writes:
>> -(define (fold2 proc ls s0 s1)
>> - (let lp ((ls ls) (s0 s0) (s1 s1))
>> - (if (null? ls)
>> - (values s0 s1)
>> - (receive (s0 s1) (proc (car ls) s0 s1)
>> - (lp (cdr ls) s0 s1)))))
>> -
>> (define (fold4 proc ls s0 s1 s2 s3)
>> (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3))
>> (if (null? ls)
>> @@ -236,15 +231,9 @@
>> (receive (s0 s1 s2 s3) (proc (car ls) s0 s1 s2 s3)
>> (lp (cdr ls) s0 s1 s2 s3)))))
>>
>> -(define (fold5 proc ls s0 s1 s2 s3 s4)
>> - (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3) (s4 s4))
>> - (if (null? ls)
>> - (values s0 s1 s2 s3 s4)
>> - (receive (s0 s1 s2 s3 s4) (proc (car ls) s0 s1 s2 s3 s4)
>> - (lp (cdr ls) s0 s1 s2 s3 s4)))))
>
> What about moving these to a helper module eventually?
Sure. Or maybe a fold-values that takes a variable number of arguments and
inlines itself...
(define-syntax fold-values
(lambda (x)
(syntax-case x ()
((_ proc list seed ...)
(with-syntax (((s ...) (generate-temporaries #'(seed ...))))
#'(let ((p proc))
(let lp ((l list) (s seed) ...)
(match l
(() (values s ...))
((elt . l)
(call-with-values (lambda () (p elt s ...))
(lambda (s ...)
(lp l s ...))))))))))))
I'll do that :)
A
--
http://wingolog.org/
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 2/6] ELF refactor and consequent linker simplifications
2013-05-23 8:47 ` Andy Wingo
@ 2013-05-23 10:27 ` Ludovic Courtès
0 siblings, 0 replies; 24+ messages in thread
From: Ludovic Courtès @ 2013-05-23 10:27 UTC (permalink / raw)
To: Andy Wingo; +Cc: guile-devel
Andy Wingo <wingo@pobox.com> skribis:
> On Wed 22 May 2013 22:44, ludo@gnu.org (Ludovic Courtès) writes:
>
>>> -(define (fold2 proc ls s0 s1)
>>> - (let lp ((ls ls) (s0 s0) (s1 s1))
>>> - (if (null? ls)
>>> - (values s0 s1)
>>> - (receive (s0 s1) (proc (car ls) s0 s1)
>>> - (lp (cdr ls) s0 s1)))))
>>> -
>>> (define (fold4 proc ls s0 s1 s2 s3)
>>> (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3))
>>> (if (null? ls)
>>> @@ -236,15 +231,9 @@
>>> (receive (s0 s1 s2 s3) (proc (car ls) s0 s1 s2 s3)
>>> (lp (cdr ls) s0 s1 s2 s3)))))
>>>
>>> -(define (fold5 proc ls s0 s1 s2 s3 s4)
>>> - (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3) (s4 s4))
>>> - (if (null? ls)
>>> - (values s0 s1 s2 s3 s4)
>>> - (receive (s0 s1 s2 s3 s4) (proc (car ls) s0 s1 s2 s3 s4)
>>> - (lp (cdr ls) s0 s1 s2 s3 s4)))))
>>
>> What about moving these to a helper module eventually?
>
> Sure. Or maybe a fold-values that takes a variable number of arguments and
> inlines itself...
>
> (define-syntax fold-values
> (lambda (x)
> (syntax-case x ()
> ((_ proc list seed ...)
> (with-syntax (((s ...) (generate-temporaries #'(seed ...))))
> #'(let ((p proc))
> (let lp ((l list) (s seed) ...)
> (match l
> (() (values s ...))
> ((elt . l)
> (call-with-values (lambda () (p elt s ...))
> (lambda (s ...)
> (lp l s ...))))))))))))
Even better. :-)
Ludo’.
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 3/6] elf: add accessors for header members that might need relocation
2013-05-18 15:05 wip-linker: Refactor ELF linker and loader Andy Wingo
2013-05-18 15:05 ` [PATCH 1/6] split linker out of elf module Andy Wingo
2013-05-18 15:05 ` [PATCH 2/6] ELF refactor and consequent linker simplifications Andy Wingo
@ 2013-05-18 15:05 ` Andy Wingo
2013-05-22 20:45 ` Ludovic Courtès
2013-05-18 15:05 ` [PATCH 4/6] refactor linker to lay out ELF files and memory in the same way Andy Wingo
` (3 subsequent siblings)
6 siblings, 1 reply; 24+ messages in thread
From: Andy Wingo @ 2013-05-18 15:05 UTC (permalink / raw)
To: guile-devel; +Cc: Andy Wingo
* module/system/vm/elf.scm (elf-header-shoff-offset)
(elf-section-header-addr-offset, elf-section-header-offset-offset):
New accessors.
---
module/system/vm/elf.scm | 24 ++++++++++++++++++++++--
1 file changed, 22 insertions(+), 2 deletions(-)
diff --git a/module/system/vm/elf.scm b/module/system/vm/elf.scm
index efa9782..b59970c 100644
--- a/module/system/vm/elf.scm
+++ b/module/system/vm/elf.scm
@@ -47,7 +47,8 @@
elf-entry elf-phoff elf-shoff elf-flags elf-ehsize
elf-phentsize elf-phnum elf-shentsize elf-shnum elf-shstrndx
- elf-header-len write-elf-header
+ elf-header-len elf-header-shoff-offset
+ write-elf-header
(make-elf-segment* . make-elf-segment)
elf-segment?
@@ -72,7 +73,9 @@
elf-section-link elf-section-info elf-section-addralign
elf-section-entsize
- elf-section-header-len write-elf-section-header
+ elf-section-header-len elf-section-header-addr-offset
+ elf-section-header-offset-offset
+ write-elf-section-header
make-elf-symbol elf-symbol?
elf-symbol-name elf-symbol-value elf-symbol-size
@@ -149,6 +152,11 @@
((4) elf32-header-len)
((8) elf64-header-len)
(else (error "invalid word size" word-size))))
+(define (elf-header-shoff-offset word-size)
+ (case word-size
+ ((4) 32)
+ ((8) 40)
+ (else (error "bad word size" word-size))))
(define ELFCLASS32 1) ; 32-bit objects
(define ELFCLASS64 2) ; 64-bit objects
@@ -621,6 +629,18 @@
((8) 64)
(else (error "bad word size" word-size))))
+(define (elf-section-header-addr-offset word-size)
+ (case word-size
+ ((4) 12)
+ ((8) 16)
+ (else (error "bad word size" word-size))))
+
+(define (elf-section-header-offset-offset word-size)
+ (case word-size
+ ((4) 16)
+ ((8) 24)
+ (else (error "bad word size" word-size))))
+
(define (parse-elf64-section-header index bv offset byte-order)
(if (<= (+ offset 64) (bytevector-length bv))
(make-elf-section index
--
1.7.10.4
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PATCH 4/6] refactor linker to lay out ELF files and memory in the same way
2013-05-18 15:05 wip-linker: Refactor ELF linker and loader Andy Wingo
` (2 preceding siblings ...)
2013-05-18 15:05 ` [PATCH 3/6] elf: add accessors for header members that might need relocation Andy Wingo
@ 2013-05-18 15:05 ` Andy Wingo
2013-05-22 20:56 ` Ludovic Courtès
2013-05-18 15:05 ` [PATCH 5/6] refactor and simplify ELF loader in objcodes.c Andy Wingo
` (2 subsequent siblings)
6 siblings, 1 reply; 24+ messages in thread
From: Andy Wingo @ 2013-05-18 15:05 UTC (permalink / raw)
To: guile-devel; +Cc: Andy Wingo
* module/system/vm/linker.scm (make-linker-object):
(linker-object-section-symbol):
(linker-object-symbols*): Create a symbol to the start of a linker
object. Hide it from the external linker-object-symbols* accessor.
(segment-kind, count-segments): Sections without SHF_ALLOC don't get
segments.
(collate-objects-into-segments): Allow for #f segment types. If two
sections have the same type and flags, leave them in the same order.
(align): Allow for 0 alignment.
(add-elf-objects): New helper: puts the ELF data structures (header,
segment table, and section table) in sections of their own. This
lends a nice clarity and conceptual unity to the linker.
(relocate-section-header, alloc-objects): Lay out segments with
congruent, contiguous addresses, so that we can just mmap the file and
if debugging sections that are not in segments are present, they can
be lazily paged in if needed by the kernel's VM system.
(link-elf): Refactor to use the new interfaces.
* test-suite/tests/linker.test: Update to expect the additional sections
for the header and section table.
---
module/system/vm/linker.scm | 331 +++++++++++++++++++++++++++---------------
test-suite/tests/linker.test | 5 +-
2 files changed, 218 insertions(+), 118 deletions(-)
diff --git a/module/system/vm/linker.scm b/module/system/vm/linker.scm
index 580981a..2baddb0 100644
--- a/module/system/vm/linker.scm
+++ b/module/system/vm/linker.scm
@@ -82,7 +82,7 @@
linker-object-section
linker-object-bv
linker-object-relocs
- linker-object-symbols
+ (linker-object-symbols* . linker-object-symbols)
make-string-table
string-table-intern
@@ -120,13 +120,23 @@
(address linker-symbol-address))
(define-record-type <linker-object>
- (make-linker-object section bv relocs symbols)
+ (%make-linker-object section bv relocs symbols)
linker-object?
(section linker-object-section)
(bv linker-object-bv)
(relocs linker-object-relocs)
(symbols linker-object-symbols))
+;; Hide a symbol to the beginning of the section in the symbols.
+(define (make-linker-object section bv relocs symbols)
+ (%make-linker-object section bv relocs
+ (cons (make-linker-symbol (gensym "*section*") 0)
+ symbols)))
+(define (linker-object-section-symbol object)
+ (car (linker-object-symbols object)))
+(define (linker-object-symbols* object)
+ (cdr (linker-object-symbols object)))
+
(define (make-string-table)
'(("" 0 #vu8())))
@@ -159,7 +169,8 @@
(let ((flags (elf-section-flags section)))
(cons (cond
((= (elf-section-type section) SHT_DYNAMIC) PT_DYNAMIC)
- ((zero? (logand SHF_ALLOC flags)) PT_NOTE)
+ ;; Sections without SHF_ALLOC don't go in segments.
+ ((zero? flags) #f)
(else PT_LOAD))
(logior (if (zero? (logand SHF_ALLOC flags))
0
@@ -171,6 +182,18 @@
0
PF_W)))))
+(define (count-segments objects)
+ (length
+ (fold1 (lambda (object kinds)
+ (let ((kind (segment-kind (linker-object-section object))))
+ (if (and (car kind) (not (member kind kinds)))
+ (cons kind kinds)
+ kinds)))
+ objects
+ ;; We know there will be at least one segment, containing at
+ ;; least the header and segment table.
+ (list (cons PT_LOAD PF_R)))))
+
(define (group-by-cars ls)
(let lp ((in ls) (k #f) (group #f) (out '()))
(cond
@@ -194,15 +217,22 @@
(cons (segment-kind (linker-object-section o)) o))
objects)
(lambda (x y)
- (let ((x-type (caar x)) (y-type (caar y))
- (x-flags (cdar x)) (y-flags (cdar y))
- (x-section (linker-object-section (cdr x)))
- (y-section (linker-object-section (cdr y))))
+ (let* ((x-kind (car x)) (y-kind (car y))
+ (x-type (car x-kind)) (y-type (car y-kind))
+ (x-flags (cdr x-kind)) (y-flags (cdr y-kind))
+ (x-section (linker-object-section (cdr x)))
+ (y-section (linker-object-section (cdr y))))
(cond
- ((not (equal? x-flags y-flags))
- (< x-flags y-flags))
- ((not (equal? x-type y-type))
- (< x-type y-type))
+ ((not (equal? x-kind y-kind))
+ (cond
+ ((and x-type y-type)
+ (cond
+ ((not (equal? x-flags y-flags))
+ (< x-flags y-flags))
+ (else
+ (< x-type y-type))))
+ (else
+ (not y-type))))
((not (equal? (elf-section-type x-section)
(elf-section-type y-section)))
(cond
@@ -211,12 +241,15 @@
(else (< (elf-section-type x-section)
(elf-section-type y-section)))))
(else
- (< (elf-section-size x-section)
- (elf-section-size y-section)))))))))
+ ;; Leave them in the initial order. This allows us to ensure
+ ;; that the ELF header is written first.
+ #f)))))))
(define (align address alignment)
- (+ address
- (modulo (- alignment (modulo address alignment)) alignment)))
+ (if (zero? alignment)
+ address
+ (+ address
+ (modulo (- alignment (modulo address alignment)) alignment))))
(define (fold1 proc ls s0)
(let lp ((ls ls) (s0 s0))
@@ -224,20 +257,20 @@
s0
(lp (cdr ls) (proc (car ls) s0)))))
-(define (fold4 proc ls s0 s1 s2 s3)
- (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3))
+(define (fold3 proc ls s0 s1 s2)
+ (let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2))
(if (null? ls)
- (values s0 s1 s2 s3)
- (receive (s0 s1 s2 s3) (proc (car ls) s0 s1 s2 s3)
- (lp (cdr ls) s0 s1 s2 s3)))))
+ (values s0 s1 s2)
+ (receive (s0 s1 s2) (proc (car ls) s0 s1 s2)
+ (lp (cdr ls) s0 s1 s2)))))
-(define (relocate-section-header sec fileaddr memaddr)
+(define (relocate-section-header sec addr)
(make-elf-section #:index (elf-section-index sec)
#:name (elf-section-name sec)
#:type (elf-section-type sec)
#:flags (elf-section-flags sec)
- #:addr memaddr
- #:offset fileaddr
+ #:addr addr
+ #:offset addr
#:size (elf-section-size sec)
#:link (elf-section-link sec)
#:info (elf-section-info sec)
@@ -258,49 +291,39 @@
symbols
symtab))
-(define (alloc-segment phidx type flags objects
- fileaddr memaddr symtab alignment)
- (let* ((loadable? (not (zero? flags)))
- (alignment (fold1 (lambda (o alignment)
+(define (alloc-objects write-segment-header!
+ phidx type flags objects addr symtab alignment)
+ (let* ((alignment (fold1 (lambda (o alignment)
(lcm (elf-section-addralign
(linker-object-section o))
alignment))
objects
alignment))
- (fileaddr (align fileaddr alignment))
- (memaddr (align memaddr alignment)))
- (receive (objects fileend memend symtab)
- (fold4 (lambda (o out fileaddr memaddr symtab)
+ (addr (align addr alignment)))
+ (receive (objects endaddr symtab)
+ (fold3 (lambda (o out addr symtab)
(let* ((section (linker-object-section o))
- (fileaddr
- (if (= (elf-section-type section) SHT_NOBITS)
- fileaddr
- (align fileaddr (elf-section-addralign section))))
- (memaddr
- (align memaddr (elf-section-addralign section))))
+ (addr (align addr (elf-section-addralign section))))
(values
(cons (make-linker-object
- (relocate-section-header section fileaddr
- memaddr)
+ (relocate-section-header section addr)
(linker-object-bv o)
(linker-object-relocs o)
(linker-object-symbols o))
out)
- (if (= (elf-section-type section) SHT_NOBITS)
- fileaddr
- (+ fileaddr (elf-section-size section)))
- (+ memaddr (elf-section-size section))
- (add-symbols (linker-object-symbols o) memaddr symtab))))
- objects '() fileaddr memaddr symtab)
- (values
- (make-elf-segment #:index phidx
- #:type type #:offset fileaddr
- #:vaddr (if loadable? memaddr 0)
- #:filesz (- fileend fileaddr)
- #:memsz (if loadable? (- memend memaddr) 0)
- #:flags flags #:align alignment)
- (reverse objects)
- symtab))))
+ (+ addr (elf-section-size section))
+ (add-symbols (linker-object-symbols o) addr symtab))))
+ objects
+ '() addr symtab)
+ (when type
+ (write-segment-header!
+ (make-elf-segment #:index phidx #:type type
+ #:offset addr #:vaddr addr
+ #:filesz (- endaddr addr) #:memsz (- endaddr addr)
+ #:flags flags #:align alignment)))
+ (values endaddr
+ (reverse objects)
+ symtab))))
(define (process-reloc reloc bv file-offset mem-offset symtab endianness)
(let ((ent (vhash-assq (linker-reloc-symbol reloc) symtab)))
@@ -351,79 +374,150 @@
(elf-section-index section))))
objects))
+(define (add-elf-objects objects endianness word-size)
+ (define phoff (elf-header-len word-size))
+ (define phentsize (elf-program-header-len word-size))
+ (define shentsize (elf-section-header-len word-size))
+ (define shnum (+ (length objects) 3))
+ (define reloc-kind
+ (case word-size
+ ((4) 'abs32/1)
+ ((8) 'abs64/1)
+ (else (error "bad word size" word-size))))
+
+ ;; ELF requires that the first entry in the section table be of type
+ ;; SHT_NULL.
+ ;;
+ (define (make-null-section)
+ (make-linker-object (make-elf-section #:index 0 #:type SHT_NULL
+ #:flags 0 #:addralign 0)
+ #vu8() '() '()))
+
+ ;; The ELF header and the segment table.
+ ;;
+ (define (make-header phnum index shoff-label)
+ (let* ((header (make-elf #:byte-order endianness #:word-size word-size
+ #:phoff phoff #:phnum phnum #:phentsize phentsize
+ #:shoff 0 #:shnum shnum #:shentsize shentsize
+ #:shstrndx (or (find-shstrndx objects) SHN_UNDEF)))
+ (shoff-reloc (make-linker-reloc reloc-kind
+ (elf-header-shoff-offset word-size)
+ 0
+ shoff-label))
+ (size (+ phoff (* phnum phentsize)))
+ (bv (make-bytevector size 0)))
+ (write-elf-header bv header)
+ ;; Leave the segment table uninitialized; it will be filled in
+ ;; later by calls to the write-segment-header! closure.
+ (make-linker-object (make-elf-section #:index index #:type SHT_PROGBITS
+ #:flags SHF_ALLOC #:size size)
+ bv
+ (list shoff-reloc)
+ '())))
+
+ ;; The section table.
+ ;;
+ (define (make-footer objects shoff-label)
+ (let* ((size (* shentsize shnum))
+ (bv (make-bytevector size 0))
+ (section-table (make-elf-section #:index (length objects)
+ #:type SHT_PROGBITS
+ #:flags 0
+ #:size size)))
+ (define (write-and-reloc section-label section relocs)
+ (let ((offset (* shentsize (elf-section-index section))))
+ (write-elf-section-header bv offset endianness word-size section)
+ (if (= (elf-section-type section) SHT_NULL)
+ relocs
+ (cons* (make-linker-reloc
+ reloc-kind
+ (+ offset (elf-section-header-addr-offset word-size))
+ 0
+ section-label)
+ (make-linker-reloc
+ reloc-kind
+ (+ offset (elf-section-header-offset-offset word-size))
+ 0
+ section-label)
+ relocs))))
+ (let ((relocs (fold1 (lambda (object relocs)
+ (write-and-reloc
+ (linker-symbol-name
+ (linker-object-section-symbol object))
+ (linker-object-section object)
+ relocs))
+ objects
+ (write-and-reloc shoff-label section-table '()))))
+ (%make-linker-object section-table bv relocs
+ (list (make-linker-symbol shoff-label 0))))))
+
+ (let* ((null-section (make-null-section))
+ (objects (cons null-section objects))
+
+ (shoff (gensym "*section-table*"))
+ (header (make-header (count-segments objects) (length objects) shoff))
+ (objects (cons header objects))
+
+ (footer (make-footer objects shoff))
+ (objects (cons footer objects)))
+
+ ;; The header includes the segment table, which needs offsets and
+ ;; sizes of the segments. Normally we would use relocs to rewrite
+ ;; these values, but there is no reloc type that would allow us to
+ ;; compute size. Such a reloc would need to take the difference
+ ;; between two symbols, and it's probably a bad idea architecturally
+ ;; to create one.
+ ;;
+ ;; So instead we return a closure to patch up the segment table.
+ ;; Normally we'd shy away from such destructive interfaces, but it's
+ ;; OK as we create the header section ourselves.
+ ;;
+ (define (write-segment-header! segment)
+ (let ((bv (linker-object-bv header))
+ (offset (+ phoff (* (elf-segment-index segment) phentsize))))
+ (write-elf-program-header bv offset endianness word-size segment)))
+
+ (values write-segment-header! objects)))
+
;; objects ::= list of <linker-object>
-;; => 3 values: ELF header, program headers, objects
+;;
+;; => 3 values:
+;; file size
+;; objects with allocated memory address and file offset
+;; symbol table
+;;
(define (allocate-elf objects page-aligned? endianness word-size)
- (let* ((seglists (collate-objects-into-segments objects))
- (nsegments (length seglists))
- (nsections (1+ (length objects))) ;; 1+ for the first reserved entry.
- (program-headers-offset (elf-header-len word-size))
- (fileaddr (+ program-headers-offset
- (* nsegments (elf-program-header-len word-size))))
- (memaddr 0))
- (let lp ((seglists seglists)
- (segments '())
+ (receive (write-segment-header! objects)
+ (add-elf-objects objects endianness word-size)
+ (let lp ((seglists (collate-objects-into-segments objects))
(objects '())
(phidx 0)
- (fileaddr fileaddr)
- (memaddr memaddr)
+ (addr 0)
(symtab vlist-null)
(prev-flags 0))
(match seglists
((((type . flags) objs-in ...) seglists ...)
- (receive (segment objs-out symtab)
- (alloc-segment phidx type flags objs-in fileaddr memaddr symtab
+ (receive (addr objs-out symtab)
+ (alloc-objects write-segment-header!
+ phidx type flags objs-in addr symtab
(if (and page-aligned?
- (not (= flags prev-flags)))
+ (not (= flags prev-flags))
+ ;; Allow sections that are not in
+ ;; loadable segments to share pages
+ ;; with PF_R segments.
+ (not (and (not type) (= PF_R prev-flags))))
*page-size*
8))
(lp seglists
- (cons segment segments)
(fold1 cons objs-out objects)
- (1+ phidx)
- (+ (elf-segment-offset segment) (elf-segment-filesz segment))
- (if (zero? (elf-segment-memsz segment))
- memaddr
- (+ (elf-segment-vaddr segment)
- (elf-segment-memsz segment)))
+ (if type (1+ phidx) phidx)
+ addr
symtab
flags)))
(()
- (let ((section-table-offset (+ (align fileaddr word-size))))
- (values
- (make-elf #:byte-order endianness #:word-size word-size
- #:phoff program-headers-offset #:phnum nsegments
- #:shoff section-table-offset #:shnum nsections
- #:shstrndx (or (find-shstrndx objects) SHN_UNDEF))
- (reverse segments)
- (let ((null-section (make-elf-section #:index 0 #:type SHT_NULL
- #:flags 0 #:addralign 0)))
- (cons (make-linker-object null-section #vu8() '() '())
- (reverse objects)))
- symtab)))))))
-
-(define (write-elf header segments objects symtab)
- (define (phoff n)
- (+ (elf-phoff header) (* n (elf-phentsize header))))
- (define (shoff n)
- (+ (elf-shoff header) (* n (elf-shentsize header))))
- (let ((endianness (elf-byte-order header))
- (word-size (elf-word-size header))
- (bv (make-bytevector (shoff (elf-shnum header)) 0)))
- (write-elf-header bv header)
- (for-each
- (lambda (segment)
- (write-elf-program-header bv (phoff (elf-segment-index segment))
- endianness word-size segment))
- segments)
- (for-each
- (lambda (object)
- (let ((section (linker-object-section object)))
- (write-elf-section-header bv (shoff (elf-section-index section))
- endianness word-size section))
- (write-linker-object bv object symtab endianness))
- objects)
- bv))
+ (values addr
+ (reverse objects)
+ symtab))))))
(define (check-section-numbers objects)
(let* ((nsections (1+ (length objects))) ; 1+ for initial NULL section.
@@ -441,15 +535,20 @@
(vector-set! sections n object)))))
objects)))
-;; Given a list of section-header/bytevector pairs, collate the sections
-;; into segments, allocate the segments, allocate the ELF bytevector,
-;; and write the segments into the bytevector, relocating as we go.
+;; Given a list of linker objects, collate the objects into segments,
+;; allocate the segments, allocate the ELF bytevector, and write the
+;; segments into the bytevector, relocating as we go.
;;
(define* (link-elf objects #:key
(page-aligned? #t)
(endianness (target-endianness))
(word-size (target-word-size)))
(check-section-numbers objects)
- (receive (header segments objects symtab)
+ (receive (size objects symtab)
(allocate-elf objects page-aligned? endianness word-size)
- (write-elf header segments objects symtab)))
+ (let ((bv (make-bytevector size 0)))
+ (for-each
+ (lambda (object)
+ (write-linker-object bv object symtab endianness))
+ objects)
+ bv)))
diff --git a/test-suite/tests/linker.test b/test-suite/tests/linker.test
index 97f7912..9e63991 100644
--- a/test-suite/tests/linker.test
+++ b/test-suite/tests/linker.test
@@ -75,8 +75,9 @@
(set! elf (parse-elf bytes))
(elf? elf)))
- ;; 3 sections: the initial NULL section, .foo, and .shstrtab.
- (pass-if-equal 3 (elf-shnum elf))
+ ;; 5 sections: the initial NULL section, .foo, .shstrtab, the initial
+ ;; header with segment table, and the section table.
+ (pass-if-equal 5 (elf-shnum elf))
(pass-if ".foo section checks out"
(let ((sec (assoc-ref (elf-sections-by-name elf) ".foo")))
--
1.7.10.4
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH 4/6] refactor linker to lay out ELF files and memory in the same way
2013-05-18 15:05 ` [PATCH 4/6] refactor linker to lay out ELF files and memory in the same way Andy Wingo
@ 2013-05-22 20:56 ` Ludovic Courtès
2013-05-23 10:46 ` Andy Wingo
0 siblings, 1 reply; 24+ messages in thread
From: Ludovic Courtès @ 2013-05-22 20:56 UTC (permalink / raw)
To: guile-devel
This goes beyond my ELF capabilities, so can’t really comment on this.
Style comments:
Andy Wingo <wingo@pobox.com> skribis:
> * module/system/vm/linker.scm (make-linker-object):
> (linker-object-section-symbol):
> (linker-object-symbols*): Create a symbol to the start of a linker
> object. Hide it from the external linker-object-symbols* accessor.
>
> (segment-kind, count-segments): Sections without SHF_ALLOC don't get
> segments.
> (collate-objects-into-segments): Allow for #f segment types. If two
> sections have the same type and flags, leave them in the same order.
>
> (align): Allow for 0 alignment.
>
> (add-elf-objects): New helper: puts the ELF data structures (header,
> segment table, and section table) in sections of their own. This
> lends a nice clarity and conceptual unity to the linker.
Please move the descriptions as docstrings.
> (relocate-section-header, alloc-objects): Lay out segments with
> congruent, contiguous addresses, so that we can just mmap the file and
> if debugging sections that are not in segments are present, they can
> be lazily paged in if needed by the kernel's VM system.
Nice. Could you move this comment in the code?
> ;; objects ::= list of <linker-object>
> -;; => 3 values: ELF header, program headers, objects
> +;;
> +;; => 3 values:
> +;; file size
> +;; objects with allocated memory address and file offset
> +;; symbol table
> +;;
> (define (allocate-elf objects page-aligned? endianness word-size)
What does it do?
Ludo’.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 4/6] refactor linker to lay out ELF files and memory in the same way
2013-05-22 20:56 ` Ludovic Courtès
@ 2013-05-23 10:46 ` Andy Wingo
2013-05-23 11:59 ` Ludovic Courtès
0 siblings, 1 reply; 24+ messages in thread
From: Andy Wingo @ 2013-05-23 10:46 UTC (permalink / raw)
To: Ludovic Courtès; +Cc: guile-devel
[-- Attachment #1: Type: text/plain, Size: 351 bytes --]
On Wed 22 May 2013 22:56, ludo@gnu.org (Ludovic Courtès) writes:
> Please move the descriptions as docstrings.
Done. I've added docstrings for this file in the patch that I'll push,
and taken the opportunity to simplify some other things. Attaching a
copy of the linker; it's easier to read as a file than as a patch.
Cheers,
Andy
[-- Attachment #2: (system vm linker) --]
[-- Type: text/plain, Size: 27170 bytes --]
;;; Guile ELF linker
;; Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc.
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;; Commentary:
;;;
;;; A linker combines several linker objects into an executable or a
;;; loadable library.
;;;
;;; There are several common formats for libraries out there. Since
;;; Guile includes its own linker and loader, we are free to choose any
;;; format, or make up our own.
;;;
;;; There are essentially two requirements for a linker format:
;;; libraries should be able to be loaded with the minimal amount of
;;; work; and they should support introspection in some way, in order to
;;; enable good debugging.
;;;
;;; These requirements are somewhat at odds, as loading should not have
;;; to stumble over features related to introspection. It so happens
;;; that a lot of smart people have thought about this situation, and
;;; the ELF format embodies the outcome of their thinking. Guile uses
;;; ELF as its format, regardless of the platform's native library
;;; format. It's not inconceivable that Guile could interoperate with
;;; the native dynamic loader at some point, but it's not a near-term
;;; goal.
;;;
;;; Guile's linker takes a list of objects, sorts them according to
;;; similarity from the perspective of the loader, then writes them out
;;; into one big bytevector in ELF format.
;;;
;;; It is often the case that different parts of a library need to refer
;;; to each other. For example, program text may need to refer to a
;;; constant from writable memory. When the linker places sections
;;; (linker objects) into specific locations in the linked bytevector,
;;; it needs to fix up those references. This process is called
;;; /relocation/. References needing relocations are recorded in
;;; "linker-reloc" objects, and collected in a list in each
;;; "linker-object". The actual definitions of the references are
;;; stored in "linker-symbol" objects, also collected in a list in each
;;; "linker-object".
;;;
;;; By default, the ELF files created by the linker include some padding
;;; so that different parts of the file can be loaded in with different
;;; permissions. For example, some parts of the file are read-only and
;;; thus can be shared between processes. Some parts of the file don't
;;; need to be loaded at all. However this padding can be too much for
;;; interactive compilation, when the code is never written out to disk;
;;; in that case, pass #:page-aligned? #f to `link-elf'.
;;;
;;; Code:
(define-module (system vm linker)
#:use-module (rnrs bytevectors)
#:use-module (system foreign)
#:use-module (system base target)
#:use-module ((srfi srfi-1) #:select (append-map))
#:use-module (srfi srfi-9)
#:use-module (ice-9 receive)
#:use-module (ice-9 vlist)
#:use-module (ice-9 match)
#:use-module (system vm elf)
#:export (make-linker-reloc
make-linker-symbol
make-linker-object
linker-object?
linker-object-section
linker-object-bv
linker-object-relocs
(linker-object-symbols* . linker-object-symbols)
make-string-table
string-table-intern
link-string-table
link-elf))
(define-syntax fold-values
(lambda (x)
(syntax-case x ()
((_ proc list seed ...)
(with-syntax (((s ...) (generate-temporaries #'(seed ...))))
#'(let ((p proc))
(let lp ((l list) (s seed) ...)
(match l
(() (values s ...))
((elt . l)
(call-with-values (lambda () (p elt s ...))
(lambda (s ...) (lp l s ...))))))))))))
;; A relocation records a reference to a symbol. When the symbol is
;; resolved to an address, the reloc location will be updated to point
;; to the address.
;;
;; Two types. Abs32/1 and Abs64/1 are absolute offsets in bytes.
;; Rel32/4 is a relative signed offset in 32-bit units. Either can have
;; an arbitrary addend as well.
;;
(define-record-type <linker-reloc>
(make-linker-reloc type loc addend symbol)
linker-reloc?
(type linker-reloc-type) ;; rel32/4, abs32/1, abs64/1
(loc linker-reloc-loc)
(addend linker-reloc-addend)
(symbol linker-reloc-symbol))
;; A symbol is an association between a name and an address. The
;; address is always in regard to some particular address space. When
;; objects come into the linker, their symbols live in the object
;; address space. When the objects are allocated into ELF segments, the
;; symbols will be relocated into memory address space, corresponding to
;; the position the ELF will be loaded at.
;;
(define-record-type <linker-symbol>
(make-linker-symbol name address)
linker-symbol?
(name linker-symbol-name)
(address linker-symbol-address))
(define-record-type <linker-object>
(%make-linker-object section bv relocs symbols)
linker-object?
(section linker-object-section)
(bv linker-object-bv)
(relocs linker-object-relocs)
(symbols linker-object-symbols))
(define (make-linker-object section bv relocs symbols)
"Create a linker object with the @code{<elf-section>} header
@var{section}, bytevector contents @var{bv}, list of linker relocations
@var{relocs}, and list of linker symbols @var{symbols}."
(%make-linker-object section bv relocs
;; Hide a symbol to the beginning of the section
;; in the symbols.
(cons (make-linker-symbol (gensym "*section*") 0)
symbols)))
(define (linker-object-section-symbol object)
"Return the linker symbol corresponding to the start of this section."
(car (linker-object-symbols object)))
(define (linker-object-symbols* object)
"Return the linker symbols defined by the user for this this section."
(cdr (linker-object-symbols object)))
(define (make-string-table)
"Return a functional string table with one entry: the empty string."
'(("" 0 #vu8())))
(define (string-table-length table)
"Return the number of bytes needed for the string table @var{table}."
(let ((last (car table)))
;; The + 1 is for the trailing NUL byte.
(+ (cadr last) (bytevector-length (caddr last)) 1)))
(define (string-table-intern table str)
"Add @var{str} to the string table @var{table}. Yields two values: a
possibly newly allocated string table, and the byte index of the string
in that table."
(cond
((assoc str table)
=> (lambda (ent)
(values table (cadr ent))))
(else
(let* ((next (string-table-length table)))
(values (cons (list str next (string->utf8 str))
table)
next)))))
(define (link-string-table table)
"Link the functional string table @var{table} into a sequence of
bytes, suitable for use as the contents of an ELF string table section."
(let ((out (make-bytevector (string-table-length table) 0)))
(for-each
(lambda (ent)
(let ((bytes (caddr ent)))
(bytevector-copy! bytes 0 out (cadr ent) (bytevector-length bytes))))
table)
out))
(define (segment-kind section)
"Return the type of segment needed to store @var{section}, as a pair.
The car is the @code{PT_} segment type, or @code{#f} if the section
doesn't need to be present in a loadable segment. The cdr is a bitfield
of associated @code{PF_} permissions."
(let ((flags (elf-section-flags section)))
(cons (cond
((= (elf-section-type section) SHT_DYNAMIC) PT_DYNAMIC)
;; Sections without SHF_ALLOC don't go in segments.
((zero? flags) #f)
(else PT_LOAD))
(logior (if (zero? (logand SHF_ALLOC flags))
0
PF_R)
(if (zero? (logand SHF_EXECINSTR flags))
0
PF_X)
(if (zero? (logand SHF_WRITE flags))
0
PF_W)))))
(define (count-segments objects)
"Return the total number of segments needed to represent the linker
objects in @var{objects}, including the segment needed for the ELF
header and segment table."
(length
(fold-values (lambda (object kinds)
(let ((kind (segment-kind (linker-object-section object))))
(if (and (car kind) (not (member kind kinds)))
(cons kind kinds)
kinds)))
objects
;; We know there will be at least one segment,
;; containing at least the header and segment table.
(list (cons PT_LOAD PF_R)))))
(define (group-by-cars ls)
(let lp ((ls ls) (k #f) (group #f) (out '()))
(match ls
(()
(reverse!
(if group
(cons (cons k (reverse! group)) out)
out)))
(((k* . v) . ls)
(if (and group (equal? k k*))
(lp ls k (cons v group) out)
(lp ls k* (list v)
(if group
(cons (cons k (reverse! group)) out)
out)))))))
(define (collate-objects-into-segments objects)
"Given the list of linker objects @var{objects}, group them into
contiguous ELF segments of the same type and flags. The result is an
alist that maps segment types to lists of linker objects. See
@code{segment-type} for a description of segment types. Within a
segment, the order of the linker objects is preserved."
(group-by-cars
(stable-sort!
(map (lambda (o)
(cons (segment-kind (linker-object-section o)) o))
objects)
(lambda (x y)
(let* ((x-kind (car x)) (y-kind (car y))
(x-type (car x-kind)) (y-type (car y-kind))
(x-flags (cdr x-kind)) (y-flags (cdr y-kind))
(x-section (linker-object-section (cdr x)))
(y-section (linker-object-section (cdr y))))
(cond
((not (equal? x-kind y-kind))
(cond
((and x-type y-type)
(cond
((not (equal? x-flags y-flags))
(< x-flags y-flags))
(else
(< x-type y-type))))
(else
(not y-type))))
((not (equal? (elf-section-type x-section)
(elf-section-type y-section)))
(cond
((equal? (elf-section-type x-section) SHT_NOBITS) #t)
((equal? (elf-section-type y-section) SHT_NOBITS) #f)
(else (< (elf-section-type x-section)
(elf-section-type y-section)))))
(else
;; Leave them in the initial order. This allows us to ensure
;; that the ELF header is written first.
#f)))))))
(define (align address alignment)
(if (zero? alignment)
address
(+ address
(modulo (- alignment (modulo address alignment)) alignment))))
(define (relocate-section-header sec addr)
"Return a new section header, just like @var{sec} but with its
@code{addr} and @code{offset} set to @var{addr}."
(make-elf-section #:index (elf-section-index sec)
#:name (elf-section-name sec)
#:type (elf-section-type sec)
#:flags (elf-section-flags sec)
#:addr addr
#:offset addr
#:size (elf-section-size sec)
#:link (elf-section-link sec)
#:info (elf-section-info sec)
#:addralign (elf-section-addralign sec)
#:entsize (elf-section-entsize sec)))
(define *page-size* 4096)
(define (add-symbols symbols offset symtab)
"Add @var{symbols} to the symbol table @var{symtab}, relocating them
from object address space to memory address space. Returns a new symbol
table."
(fold-values
(lambda (symbol symtab)
(let ((name (linker-symbol-name symbol))
(addr (linker-symbol-address symbol)))
(when (vhash-assq name symtab)
(error "duplicate symbol" name))
(vhash-consq name (make-linker-symbol name (+ addr offset)) symtab)))
symbols
symtab))
(define (allocate-segment write-segment-header!
phidx type flags objects addr symtab alignment)
"Given a list of linker objects that should go in a segment, the type
and flags that the segment should have, and the address at which the
segment should start, compute the positions that each object should have
in the segment.
Returns three values: the address of the next byte after the segment, a
list of relocated objects, and the symbol table. The symbol table is
the same as @var{symtab}, augmented with the symbols defined in
@var{objects}, relocated to their positions in the image.
In what is something of a quirky interface, this routine also patches up
the segment table using @code{write-segment-header!}."
(let* ((alignment (fold-values (lambda (o alignment)
(lcm (elf-section-addralign
(linker-object-section o))
alignment))
objects
alignment))
(addr (align addr alignment)))
(receive (objects endaddr symtab)
(fold-values
(lambda (o out addr symtab)
(let* ((section (linker-object-section o))
(addr (align addr (elf-section-addralign section))))
(values
(cons (make-linker-object
(relocate-section-header section addr)
(linker-object-bv o)
(linker-object-relocs o)
(linker-object-symbols o))
out)
(+ addr (elf-section-size section))
(add-symbols (linker-object-symbols o) addr symtab))))
objects
'() addr symtab)
(when type
(write-segment-header!
(make-elf-segment #:index phidx #:type type
#:offset addr #:vaddr addr
#:filesz (- endaddr addr) #:memsz (- endaddr addr)
#:flags flags #:align alignment)))
(values endaddr
(reverse objects)
symtab))))
(define (process-reloc reloc bv section-offset symtab endianness)
"Process a relocation. Given that a section containing @var{reloc}
was just written into the image @var{bv} at offset @var{section-offset},
fix it up so that its reference points to the correct position of its
symbol, as present in @var{symtab}."
(match (vhash-assq (linker-reloc-symbol reloc) symtab)
(#f
(error "Undefined symbol" (linker-reloc-symbol reloc)))
((name . symbol)
;; The reloc was written at LOC bytes after SECTION-OFFSET.
(let* ((offset (+ (linker-reloc-loc reloc) section-offset))
(target (linker-symbol-address symbol)))
(case (linker-reloc-type reloc)
((rel32/4)
(let ((diff (- target offset)))
(unless (zero? (modulo diff 4))
(error "Bad offset" reloc symbol offset))
(bytevector-s32-set! bv offset
(+ (/ diff 4) (linker-reloc-addend reloc))
endianness)))
((abs32/1)
(bytevector-u32-set! bv offset target endianness))
((abs64/1)
(bytevector-u64-set! bv offset target endianness))
(else
(error "bad reloc type" reloc)))))))
(define (write-linker-object bv o symtab endianness)
"Write the bytevector for the section wrapped by the linker object
@var{o} into the image @var{bv}. The section header in @var{o} should
already be relocated its final position in the image. Any relocations
in the section will be processed to point to the correct symbol
locations, as given in @var{symtab}."
(let* ((section (linker-object-section o))
(offset (elf-section-offset section))
(len (elf-section-size section))
(bytes (linker-object-bv o))
(relocs (linker-object-relocs o)))
(unless (= offset (elf-section-addr section))
(error "offset != addr" section))
(if (not (= (elf-section-type section) SHT_NOBITS))
(begin
(if (not (= len (bytevector-length bytes)))
(error "unexpected length" section bytes))
(bytevector-copy! bytes 0 bv offset len)
(for-each (lambda (reloc)
(process-reloc reloc bv offset symtab endianness))
relocs)))))
(define (find-shstrndx objects)
"Find the section name string table in @var{objects}, and return its
section index."
(or-map (lambda (object)
(let* ((section (linker-object-section object))
(bv (linker-object-bv object))
(name (elf-section-name section)))
(and (= (elf-section-type section) SHT_STRTAB)
(equal? (false-if-exception (string-table-ref bv name))
".shstrtab")
(elf-section-index section))))
objects))
(define (add-elf-objects objects endianness word-size)
"Given the list of linker objects supplied by the user, add linker
objects corresponding to parts of the ELF file: the null object, the ELF
header, and the section table.
Both of these internal objects include relocs, allowing their
inter-object references to be patched up when the final image allocation
is known. There is special support for patching up the segment table,
however. Because the segment table needs to know the segment sizes,
which is the difference between two symbols in image space, and there is
no reloc kind that is the difference between two symbols, we make a hack
and return a closure that patches up segment table entries. It seems to
work.
Returns two values: the procedure to patch the segment table, and the
list of objects, augmented with objects for the special ELF sections."
(define phoff (elf-header-len word-size))
(define phentsize (elf-program-header-len word-size))
(define shentsize (elf-section-header-len word-size))
(define shnum (+ (length objects) 3))
(define reloc-kind
(case word-size
((4) 'abs32/1)
((8) 'abs64/1)
(else (error "bad word size" word-size))))
;; ELF requires that the first entry in the section table be of type
;; SHT_NULL.
;;
(define (make-null-section)
(make-linker-object (make-elf-section #:index 0 #:type SHT_NULL
#:flags 0 #:addralign 0)
#vu8() '() '()))
;; The ELF header and the segment table.
;;
(define (make-header phnum index shoff-label)
(let* ((header (make-elf #:byte-order endianness #:word-size word-size
#:phoff phoff #:phnum phnum #:phentsize phentsize
#:shoff 0 #:shnum shnum #:shentsize shentsize
#:shstrndx (or (find-shstrndx objects) SHN_UNDEF)))
(shoff-reloc (make-linker-reloc reloc-kind
(elf-header-shoff-offset word-size)
0
shoff-label))
(size (+ phoff (* phnum phentsize)))
(bv (make-bytevector size 0)))
(write-elf-header bv header)
;; Leave the segment table uninitialized; it will be filled in
;; later by calls to the write-segment-header! closure.
(make-linker-object (make-elf-section #:index index #:type SHT_PROGBITS
#:flags SHF_ALLOC #:size size)
bv
(list shoff-reloc)
'())))
;; The section table.
;;
(define (make-footer objects shoff-label)
(let* ((size (* shentsize shnum))
(bv (make-bytevector size 0))
(section-table (make-elf-section #:index (length objects)
#:type SHT_PROGBITS
#:flags 0
#:size size)))
(define (write-and-reloc section-label section relocs)
(let ((offset (* shentsize (elf-section-index section))))
(write-elf-section-header bv offset endianness word-size section)
(if (= (elf-section-type section) SHT_NULL)
relocs
(cons* (make-linker-reloc
reloc-kind
(+ offset (elf-section-header-addr-offset word-size))
0
section-label)
(make-linker-reloc
reloc-kind
(+ offset (elf-section-header-offset-offset word-size))
0
section-label)
relocs))))
(let ((relocs (fold-values
(lambda (object relocs)
(write-and-reloc
(linker-symbol-name
(linker-object-section-symbol object))
(linker-object-section object)
relocs))
objects
(write-and-reloc shoff-label section-table '()))))
(%make-linker-object section-table bv relocs
(list (make-linker-symbol shoff-label 0))))))
(let* ((null-section (make-null-section))
(objects (cons null-section objects))
(shoff (gensym "*section-table*"))
(header (make-header (count-segments objects) (length objects) shoff))
(objects (cons header objects))
(footer (make-footer objects shoff))
(objects (cons footer objects)))
;; The header includes the segment table, which needs offsets and
;; sizes of the segments. Normally we would use relocs to rewrite
;; these values, but there is no reloc type that would allow us to
;; compute size. Such a reloc would need to take the difference
;; between two symbols, and it's probably a bad idea architecturally
;; to create one.
;;
;; So instead we return a closure to patch up the segment table.
;; Normally we'd shy away from such destructive interfaces, but it's
;; OK as we create the header section ourselves.
;;
(define (write-segment-header! segment)
(let ((bv (linker-object-bv header))
(offset (+ phoff (* (elf-segment-index segment) phentsize))))
(write-elf-program-header bv offset endianness word-size segment)))
(values write-segment-header! objects)))
(define (allocate-elf objects page-aligned? endianness word-size)
"Lay out @var{objects} into an ELF image, computing the size of the
file, the positions of the objects, and the global symbol table.
If @var{page-aligned?} is true, read-only and writable data are
separated so that only those writable parts of the image need be mapped
with writable permissions. This makes the resulting image larger. It
is more suitable to situations where you would write a file out to disk
and read it in with mmap. Otherwise if @var{page-aligned?} is false,
sections default to 8-byte alignment.
Returns three values: the total image size, a list of objects with
relocated headers, and the global symbol table."
(receive (write-segment-header! objects)
(add-elf-objects objects endianness word-size)
(let lp ((seglists (collate-objects-into-segments objects))
(objects '())
(phidx 0)
(addr 0)
(symtab vlist-null)
(prev-flags 0))
(match seglists
((((type . flags) objs-in ...) seglists ...)
(receive (addr objs-out symtab)
(allocate-segment
write-segment-header!
phidx type flags objs-in addr symtab
(if (and page-aligned?
(not (= flags prev-flags))
;; Allow sections that are not in
;; loadable segments to share pages
;; with PF_R segments.
(not (and (not type) (= PF_R prev-flags))))
*page-size*
8))
(lp seglists
(fold-values cons objs-out objects)
(if type (1+ phidx) phidx)
addr
symtab
flags)))
(()
(values addr
(reverse objects)
symtab))))))
(define (check-section-numbers objects)
"Verify that taken as a whole, that all objects have distinct,
contiguous section numbers, starting from 1. (Section 0 is the null
section.)"
(let* ((nsections (1+ (length objects))) ; 1+ for initial NULL section.
(sections (make-vector nsections #f)))
(for-each (lambda (object)
(let ((n (elf-section-index (linker-object-section object))))
(cond
((< n 1)
(error "Invalid section number" object))
((>= n nsections)
(error "Invalid section number" object))
((vector-ref sections n)
(error "Duplicate section" (vector-ref sections n) object))
(else
(vector-set! sections n object)))))
objects)))
;; Given a list of linker objects, collate the objects into segments,
;; allocate the segments, allocate the ELF bytevector, and write the
;; segments into the bytevector, relocating as we go.
;;
(define* (link-elf objects #:key
(page-aligned? #t)
(endianness (target-endianness))
(word-size (target-word-size)))
"Create an ELF image from the linker objects, @var{objects}.
If @var{page-aligned?} is true, read-only and writable data are
separated so that only those writable parts of the image need be mapped
with writable permissions. This is suitable for situations where you
would write a file out to disk and read it in with @code{mmap}.
Otherwise if @var{page-aligned?} is false, sections default to 8-byte
alignment.
Returns a bytevector."
(check-section-numbers objects)
(receive (size objects symtab)
(allocate-elf objects page-aligned? endianness word-size)
(let ((bv (make-bytevector size 0)))
(for-each
(lambda (object)
(write-linker-object bv object symtab endianness))
objects)
bv)))
[-- Attachment #3: Type: text/plain, Size: 26 bytes --]
--
http://wingolog.org/
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 4/6] refactor linker to lay out ELF files and memory in the same way
2013-05-23 10:46 ` Andy Wingo
@ 2013-05-23 11:59 ` Ludovic Courtès
0 siblings, 0 replies; 24+ messages in thread
From: Ludovic Courtès @ 2013-05-23 11:59 UTC (permalink / raw)
To: Andy Wingo; +Cc: guile-devel
Andy Wingo <wingo@pobox.com> skribis:
> On Wed 22 May 2013 22:56, ludo@gnu.org (Ludovic Courtès) writes:
>
>> Please move the descriptions as docstrings.
>
> Done. I've added docstrings for this file in the patch that I'll push,
> and taken the opportunity to simplify some other things. Attaching a
> copy of the linker; it's easier to read as a file than as a patch.
Very nice, thank you!
I think it’ll make it easier for people to page it in. ;-)
Ludo’.
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 5/6] refactor and simplify ELF loader in objcodes.c
2013-05-18 15:05 wip-linker: Refactor ELF linker and loader Andy Wingo
` (3 preceding siblings ...)
2013-05-18 15:05 ` [PATCH 4/6] refactor linker to lay out ELF files and memory in the same way Andy Wingo
@ 2013-05-18 15:05 ` Andy Wingo
2013-05-22 21:06 ` Ludovic Courtès
2013-05-18 15:05 ` [PATCH 6/6] add (find-mapped-elf-image) procedure to (system vm objcode) module Andy Wingo
2013-05-22 21:13 ` wip-linker: Refactor ELF linker and loader Ludovic Courtès
6 siblings, 1 reply; 24+ messages in thread
From: Andy Wingo @ 2013-05-18 15:05 UTC (permalink / raw)
To: guile-devel; +Cc: Andy Wingo
* libguile/objcodes.c (sniff_elf_alignment, alloc_aligned)
(copy_and_align_elf_data): New helpers for portably re-aligning ELF
data from read(2) or from a bytevector.
(load_thunk_from_memory): Simplify! Now there is only one procedure
that loads ELF, and it does less: it simply receives the whole image
in one array, hopefully from mmap.
(scm_load_thunk_from_file): Use new map_file_contents helper, and go
through load_thunk_from_memory.
(scm_load_thunk_from_memory): Pass load_thunk_from_memory a piece of
memory that it owns, and that is appropriately aligned.
---
libguile/objcodes.c | 435 +++++++++++++++++++++++----------------------------
1 file changed, 195 insertions(+), 240 deletions(-)
diff --git a/libguile/objcodes.c b/libguile/objcodes.c
index 3f51c70..c06265e 100644
--- a/libguile/objcodes.c
+++ b/libguile/objcodes.c
@@ -156,48 +156,104 @@ check_elf_header (const Elf_Ehdr *header)
return NULL;
}
-static int
-segment_flags_to_prot (Elf_Word flags)
+#define IS_ALIGNED(offset, alignment) \
+ (!((offset) & ((alignment) - 1)))
+#define ALIGN(offset, alignment) \
+ ((offset + (alignment - 1)) & ~(alignment - 1))
+
+static unsigned
+sniff_elf_alignment (const char *data, size_t len)
{
- int prot = 0;
-
- if (flags & PF_X)
- prot |= PROT_EXEC;
- if (flags & PF_W)
- prot |= PROT_WRITE;
- if (flags & PF_R)
- prot |= PROT_READ;
+ Elf_Ehdr *header;
+ int i;
+ unsigned alignment = 8;
+
+ if (len < sizeof(Elf_Ehdr))
+ return alignment;
+ header = (Elf_Ehdr *) data;
+ if (header->e_phoff + header->e_phnum * header->e_phentsize >= len)
+ return alignment;
+ for (i = 0; i < header->e_phnum; i++)
+ {
+ Elf_Phdr *phdr;
+ const char *phdr_addr = data + header->e_phoff + i * header->e_phentsize;
- return prot;
+ if (!IS_ALIGNED ((scm_t_uintptr) phdr_addr, alignof_type (Elf_Phdr)))
+ return alignment;
+ phdr = (Elf_Phdr *) phdr_addr;
+
+ if (phdr->p_align & (phdr->p_align - 1))
+ return alignment;
+
+ if (phdr->p_align > alignment)
+ alignment = phdr->p_align;
+ }
+
+ return alignment;
}
-static int
-map_segments (int fd, char **base,
- const Elf_Phdr *from, const Elf_Phdr *to)
+/* This function leaks the memory that it allocates. */
+static char*
+alloc_aligned (size_t len, unsigned alignment)
{
- int prot = segment_flags_to_prot (from->p_flags);
char *ret;
- ret = mmap (*base + from->p_vaddr,
- to->p_offset + to->p_filesz - from->p_offset,
- prot, MAP_PRIVATE, fd, from->p_offset);
+ if (alignment == 8)
+ {
+ /* FIXME: Assert that we actually have an 8-byte-aligned malloc. */
+ ret = malloc (len);
+ }
+#if defined(HAVE_SYS_MMAN_H) && defined(MMAP_ANONYMOUS)
+ else if (alignment == SCM_PAGE_SIZE)
+ {
+ ret = mmap (NULL, len, PROT_READ | PROT_WRITE, -1, 0);
+ if (ret == MAP_FAILED)
+ SCM_SYSERROR;
+ }
+#endif
+ else
+ {
+ if (len + alignment < len)
+ abort ();
+
+ ret = malloc (len + alignment - 1);
+ if (!ret)
+ abort ();
+ ret = (char *) ALIGN ((scm_t_uintptr) ret, alignment);
+ }
- if (ret == (char *) -1)
- return 1;
+ return ret;
+}
+
+static char*
+copy_and_align_elf_data (const char *data, size_t len)
+{
+ unsigned alignment;
+ char *copy;
- if (!*base)
- *base = ret;
+ alignment = sniff_elf_alignment (data, len);
+ copy = alloc_aligned (len, alignment);
+ memcpy(copy, data, len);
- return 0;
+ return copy;
}
+#ifdef HAVE_SYS_MMAN_H
static int
-mprotect_segments (char *base, const Elf_Phdr *from, const Elf_Phdr *to)
+segment_flags_to_prot (Elf_Word flags)
{
- return mprotect (base + from->p_vaddr,
- to->p_vaddr + to->p_memsz - from->p_vaddr,
- segment_flags_to_prot (from->p_flags));
+ int prot = 0;
+
+ if (flags & PF_X)
+ prot |= PROT_EXEC;
+ if (flags & PF_W)
+ prot |= PROT_WRITE;
+ if (flags & PF_R)
+ prot |= PROT_READ;
+
+ return prot;
}
+#endif
static char*
process_dynamic_segment (char *base, Elf_Phdr *dyn_phdr,
@@ -272,152 +328,38 @@ process_dynamic_segment (char *base, Elf_Phdr *dyn_phdr,
#define ABORT(msg) do { err_msg = msg; goto cleanup; } while (0)
-#ifdef HAVE_SYS_MMAN_H
static SCM
-load_thunk_from_fd_using_mmap (int fd)
-#define FUNC_NAME "load-thunk-from-disk"
+load_thunk_from_memory (char *data, size_t len, int is_read_only)
+#define FUNC_NAME "load-thunk-from-memory"
{
- Elf_Ehdr header;
+ Elf_Ehdr *header;
Elf_Phdr *ph;
const char *err_msg = 0;
- char *base = 0;
- size_t n;
+ size_t n, alignment = 8;
int i;
- int start_segment = -1;
- int prev_segment = -1;
int dynamic_segment = -1;
SCM init = SCM_BOOL_F, entry = SCM_BOOL_F;
- if (full_read (fd, &header, sizeof header) != sizeof header)
+ if (len < sizeof *header)
ABORT ("object file too small");
- if ((err_msg = check_elf_header (&header)))
- goto cleanup;
-
- if (lseek (fd, header.e_phoff, SEEK_SET) == (off_t) -1)
- goto cleanup;
+ header = (Elf_Ehdr*) data;
- n = header.e_phnum;
- ph = scm_gc_malloc_pointerless (n * sizeof (Elf_Phdr), "segment headers");
-
- if (full_read (fd, ph, n * sizeof (Elf_Phdr)) != n * sizeof (Elf_Phdr))
- ABORT ("failed to read program headers");
-
- for (i = 0; i < n; i++)
- {
- if (!ph[i].p_memsz)
- continue;
-
- if (ph[i].p_filesz != ph[i].p_memsz)
- ABORT ("expected p_filesz == p_memsz");
-
- if (!ph[i].p_flags)
- ABORT ("expected nonzero segment flags");
-
- if (ph[i].p_type == PT_DYNAMIC)
- {
- if (dynamic_segment >= 0)
- ABORT ("expected only one PT_DYNAMIC segment");
- dynamic_segment = i;
- }
-
- if (start_segment < 0)
- {
- if (!base && ph[i].p_vaddr)
- ABORT ("first loadable vaddr is not 0");
-
- start_segment = prev_segment = i;
- continue;
- }
-
- if (ph[i].p_flags == ph[start_segment].p_flags)
- {
- if (ph[i].p_vaddr - ph[prev_segment].p_vaddr
- != ph[i].p_offset - ph[prev_segment].p_offset)
- ABORT ("coalesced segments not contiguous");
-
- prev_segment = i;
- continue;
- }
-
- /* Otherwise we have a new kind of segment. Map previous
- segments. */
- if (map_segments (fd, &base, &ph[start_segment], &ph[prev_segment]))
- goto cleanup;
-
- /* Open a new set of segments. */
- start_segment = prev_segment = i;
- }
-
- /* Map last segments. */
- if (start_segment < 0)
- ABORT ("no loadable segments");
-
- if (map_segments (fd, &base, &ph[start_segment], &ph[prev_segment]))
+ if ((err_msg = check_elf_header (header)))
goto cleanup;
- if (dynamic_segment < 0)
- ABORT ("no PT_DYNAMIC segment");
-
- if ((err_msg = process_dynamic_segment (base, &ph[dynamic_segment],
- &init, &entry)))
- goto cleanup;
-
- if (scm_is_true (init))
- scm_call_0 (init);
-
- /* Finally! Return the thunk. */
- return entry;
-
- /* FIXME: munmap on error? */
- cleanup:
- {
- int errno_save = errno;
- (void) close (fd);
- errno = errno_save;
- if (errno)
- SCM_SYSERROR;
- scm_misc_error (FUNC_NAME, err_msg ? err_msg : "error loading ELF file",
- SCM_EOL);
- }
-}
-#undef FUNC_NAME
-#endif /* HAVE_SYS_MMAN_H */
-
-static SCM
-load_thunk_from_memory (char *data, size_t len)
-#define FUNC_NAME "load-thunk-from-memory"
-{
- Elf_Ehdr header;
- Elf_Phdr *ph;
- const char *err_msg = 0;
- char *base = 0;
- size_t n, memsz = 0, alignment = 8;
- int i;
- int first_loadable = -1;
- int start_segment = -1;
- int prev_segment = -1;
- int dynamic_segment = -1;
- SCM init = SCM_BOOL_F, entry = SCM_BOOL_F;
+ if (header->e_phnum == 0)
+ ABORT ("no loadable segments");
+ n = header->e_phnum;
- if (len < sizeof header)
+ if (len < header->e_phoff + n * sizeof (Elf_Phdr))
ABORT ("object file too small");
- memcpy (&header, data, sizeof header);
-
- if ((err_msg = check_elf_header (&header)))
- goto cleanup;
-
- n = header.e_phnum;
- if (len < header.e_phoff + n * sizeof (Elf_Phdr))
- goto cleanup;
- ph = (Elf_Phdr*) (data + header.e_phoff);
+ ph = (Elf_Phdr*) (data + header->e_phoff);
+ /* Check that the segment table is sane. */
for (i = 0; i < n; i++)
{
- if (!ph[i].p_memsz)
- continue;
-
if (ph[i].p_filesz != ph[i].p_memsz)
ABORT ("expected p_filesz == p_memsz");
@@ -438,84 +380,49 @@ load_thunk_from_memory (char *data, size_t len)
dynamic_segment = i;
}
- if (first_loadable < 0)
+ if (i == 0)
{
- if (ph[i].p_vaddr)
+ if (ph[i].p_vaddr != 0)
ABORT ("first loadable vaddr is not 0");
-
- first_loadable = i;
}
+ else
+ {
+ if (ph[i].p_vaddr < ph[i-1].p_vaddr + ph[i-1].p_memsz)
+ ABORT ("overlapping segments");
- if (ph[i].p_vaddr < memsz)
- ABORT ("overlapping segments");
-
- if (ph[i].p_offset + ph[i].p_filesz > len)
- ABORT ("segment beyond end of byte array");
-
- memsz = ph[i].p_vaddr + ph[i].p_memsz;
+ if (ph[i].p_offset + ph[i].p_filesz > len)
+ ABORT ("segment beyond end of byte array");
+ }
}
- if (first_loadable < 0)
- ABORT ("no loadable segments");
-
if (dynamic_segment < 0)
ABORT ("no PT_DYNAMIC segment");
- /* Now copy segments. */
-
- /* We leak this memory, as we leak the memory mappings in
- load_thunk_from_fd_using_mmap.
+ if (!IS_ALIGNED ((scm_t_uintptr) data, alignment))
+ ABORT ("incorrectly aligned base");
- If the file is has an alignment of 8, use the standard malloc.
- (FIXME to ensure alignment on non-GNU malloc.) Otherwise use
- posix_memalign. We only use mprotect if the aligment is 4096. */
- if (alignment == 8)
+ /* Allow writes to writable pages. */
+ if (is_read_only)
{
- base = malloc (memsz);
- if (!base)
- goto cleanup;
- }
- else
- if ((errno = posix_memalign ((void **) &base, alignment, memsz)))
- goto cleanup;
-
- memset (base, 0, memsz);
-
- for (i = 0; i < n; i++)
- {
- if (!ph[i].p_memsz)
- continue;
-
- memcpy (base + ph[i].p_vaddr,
- data + ph[i].p_offset,
- ph[i].p_memsz);
-
- if (start_segment < 0)
- {
- start_segment = prev_segment = i;
- continue;
- }
-
- if (ph[i].p_flags == ph[start_segment].p_flags)
+#ifdef HAVE_SYS_MMAN_H
+ for (i = 0; i < n; i++)
{
- prev_segment = i;
- continue;
+ if (ph[i].p_flags == PF_R)
+ continue;
+ if (ph[i].p_align != 4096)
+ continue;
+
+ if (mprotect (data + ph[i].p_vaddr,
+ ph[i].p_memsz,
+ segment_flags_to_prot (ph[i].p_flags)))
+ goto cleanup;
}
-
- if (alignment == 4096)
- if (mprotect_segments (base, &ph[start_segment], &ph[prev_segment]))
- goto cleanup;
-
- /* Open a new set of segments. */
- start_segment = prev_segment = i;
+#else
+ ABORT ("expected writable pages");
+#endif
}
- /* Mprotect the last segments. */
- if (alignment == 4096)
- if (mprotect_segments (base, &ph[start_segment], &ph[prev_segment]))
- goto cleanup;
-
- if ((err_msg = process_dynamic_segment (base, &ph[dynamic_segment],
+ if ((err_msg = process_dynamic_segment (data, &ph[dynamic_segment],
&init, &entry)))
goto cleanup;
@@ -535,22 +442,40 @@ load_thunk_from_memory (char *data, size_t len)
}
#undef FUNC_NAME
-#ifndef HAVE_SYS_MMAN_H
-static SCM
-load_thunk_from_fd_using_read (int fd)
-#define FUNC_NAME "load-thunk-from-disk"
+#define SCM_PAGE_SIZE 4096
+
+static char*
+map_file_contents (int fd, size_t len, int *is_read_only)
+#define FUNC_NAME "load-thunk-from-file"
{
char *data;
- size_t len;
- struct stat st;
- int ret;
- ret = fstat (fd, &st);
- if (ret < 0)
+#ifdef HAVE_SYS_MMAN_H
+ data = mmap (NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (data == MAP_FAILED)
SCM_SYSERROR;
- len = st.st_size;
- data = scm_gc_malloc_pointerless (len, "objcode");
- if (full_read (fd, data, len) != len)
+ *is_read_only = 1;
+#else
+ if (lseek (fd, 0, SEEK_START) < 0)
+ {
+ int errno_save = errno;
+ (void) close (fd);
+ errno = errno_save;
+ SCM_SYSERROR;
+ }
+
+ /* Given that we are using the read fallback, optimistically assume
+ that the .go files were made with 8-byte alignment.
+ alignment. */
+ data = malloc (end);
+ if (!data)
+ {
+ (void) close (fd);
+ scm_misc_error (FUNC_NAME, "failed to allocate ~A bytes",
+ scm_list_1 (scm_from_size_t (end)));
+ }
+
+ if (full_read (fd, data, end) != end)
{
int errno_save = errno;
(void) close (fd);
@@ -560,11 +485,25 @@ load_thunk_from_fd_using_read (int fd)
scm_misc_error (FUNC_NAME, "short read while loading objcode",
SCM_EOL);
}
- (void) close (fd);
- return load_thunk_from_memory (data, len);
+
+ /* If our optimism failed, fall back. */
+ {
+ unsigned alignment = sniff_elf_alignment (data, end);
+
+ if (alignment != 8)
+ {
+ char *copy = copy_and_align_elf_data (data, end, alignment);
+ free (data);
+ data = copy;
+ }
+ }
+
+ *is_read_only = 0;
+#endif
+
+ return data;
}
#undef FUNC_NAME
-#endif /* ! HAVE_SYS_MMAN_H */
SCM_DEFINE (scm_load_thunk_from_file, "load-thunk-from-file", 1, 0, 0,
(SCM filename),
@@ -572,7 +511,9 @@ SCM_DEFINE (scm_load_thunk_from_file, "load-thunk-from-file", 1, 0, 0,
#define FUNC_NAME s_scm_load_thunk_from_file
{
char *c_filename;
- int fd;
+ int fd, is_read_only;
+ off_t end;
+ char *data;
SCM_VALIDATE_STRING (1, filename);
@@ -581,11 +522,15 @@ SCM_DEFINE (scm_load_thunk_from_file, "load-thunk-from-file", 1, 0, 0,
free (c_filename);
if (fd < 0) SCM_SYSERROR;
-#ifdef HAVE_SYS_MMAN_H
- return load_thunk_from_fd_using_mmap (fd);
-#else
- return load_thunk_from_fd_using_read (fd);
-#endif
+ end = lseek (fd, 0, SEEK_END);
+ if (end < 0)
+ SCM_SYSERROR;
+
+ data = map_file_contents (fd, end, &is_read_only);
+
+ (void) close (fd);
+
+ return load_thunk_from_memory (data, end, is_read_only);
}
#undef FUNC_NAME
@@ -594,10 +539,20 @@ SCM_DEFINE (scm_load_thunk_from_memory, "load-thunk-from-memory", 1, 0, 0,
"")
#define FUNC_NAME s_scm_load_thunk_from_memory
{
+ char *data;
+ size_t len;
+
SCM_VALIDATE_BYTEVECTOR (1, bv);
- return load_thunk_from_memory ((char *) SCM_BYTEVECTOR_CONTENTS (bv),
- SCM_BYTEVECTOR_LENGTH (bv));
+ data = (char *) SCM_BYTEVECTOR_CONTENTS (bv);
+ len = SCM_BYTEVECTOR_LENGTH (bv);
+
+ /* Copy data in order to align it, to trace its GC roots and
+ writable sections, and to keep it in memory. */
+
+ data = copy_and_align_elf_data (data, len);
+
+ return load_thunk_from_memory (data, len, 0);
}
#undef FUNC_NAME
--
1.7.10.4
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH 5/6] refactor and simplify ELF loader in objcodes.c
2013-05-18 15:05 ` [PATCH 5/6] refactor and simplify ELF loader in objcodes.c Andy Wingo
@ 2013-05-22 21:06 ` Ludovic Courtès
2013-05-23 10:58 ` Andy Wingo
0 siblings, 1 reply; 24+ messages in thread
From: Ludovic Courtès @ 2013-05-22 21:06 UTC (permalink / raw)
To: guile-devel
Andy Wingo <wingo@pobox.com> skribis:
> * libguile/objcodes.c (sniff_elf_alignment, alloc_aligned)
> (copy_and_align_elf_data): New helpers for portably re-aligning ELF
> data from read(2) or from a bytevector.
> (load_thunk_from_memory): Simplify! Now there is only one procedure
> that loads ELF, and it does less: it simply receives the whole image
> in one array, hopefully from mmap.
>
> (scm_load_thunk_from_file): Use new map_file_contents helper, and go
> through load_thunk_from_memory.
> (scm_load_thunk_from_memory): Pass load_thunk_from_memory a piece of
> memory that it owns, and that is appropriately aligned.
> ---
> libguile/objcodes.c | 435 +++++++++++++++++++++++----------------------------
> 1 file changed, 195 insertions(+), 240 deletions(-)
Lines removed, cool! ;-)
> +static unsigned
> +sniff_elf_alignment (const char *data, size_t len)
What about:
/* Return the alignment required by the ELF at DATA,
of LEN bytes. */
static size_t
elf_alignment (const char *elf, size_t len)
> +/* This function leaks the memory that it allocates. */
> +static char*
> +alloc_aligned (size_t len, unsigned alignment)
What about using posix_memalign or similar?
Alternatively, using scm_gc_malloc_pointerless, which is known to return
8-byte-aligned boundary?
Or using ALIGN (malloc (size + 8), 8)?
> + /* Given that we are using the read fallback, optimistically assume
> + that the .go files were made with 8-byte alignment.
> + alignment. */
> + data = malloc (end);
> + if (!data)
scm_malloc would save you from the if (!data).
Would it be OK to use scm_gc_malloc_pointerless here?
> + /* If our optimism failed, fall back. */
> + {
> + unsigned alignment = sniff_elf_alignment (data, end);
> +
> + if (alignment != 8)
Since .go are only produced by Guile, can it really happen to have .go
files without 8-byte alignment?
Ludo’.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 5/6] refactor and simplify ELF loader in objcodes.c
2013-05-22 21:06 ` Ludovic Courtès
@ 2013-05-23 10:58 ` Andy Wingo
2013-05-23 20:35 ` Ludovic Courtès
0 siblings, 1 reply; 24+ messages in thread
From: Andy Wingo @ 2013-05-23 10:58 UTC (permalink / raw)
To: Ludovic Courtès; +Cc: guile-devel
On Wed 22 May 2013 23:06, ludo@gnu.org (Ludovic Courtès) writes:
>> +/* This function leaks the memory that it allocates. */
>> +static char*
>> +alloc_aligned (size_t len, unsigned alignment)
>
> What about using posix_memalign or similar?
Not present on many systems, and gnulib can't make a useful wrapper. If
you do the normal trick of allocating more with malloc then aligning it,
you make it impossible to free().
> Alternatively, using scm_gc_malloc_pointerless, which is known to return
> 8-byte-aligned boundary?
For two reasons. One is that this memory is not collectable. Static
allocation of constants means that you can get pointers into the middle
of the image from the heap. I wanted to avoid pressuring the GC.
The other reason is that you can get pointers out of the image *to* the
heap -- for data that is statically allocated, but fixed up at runtime.
For example, keywords. Their stringbuf is allocated statically in
read-only memory, but there is also a cell allocated for the keyword
itself, and that lives on the heap.
The best is to use mmap. But if you fall back, malloc is OK, and we can
manually align it. In any case the loader ends up adding part of the
image to the GC roots, so it's probably best that the memory not be
managed by the GC itself.
>> + /* If our optimism failed, fall back. */
>> + {
>> + unsigned alignment = sniff_elf_alignment (data, end);
>> +
>> + if (alignment != 8)
>
> Since .go are only produced by Guile, can it really happen to have .go
> files without 8-byte alignment?
Yes! If the system supports mmap, .go files should be produced with
4096-byte alignment so that the whole thing can be mapped read-only, and
write permissions only need be given to the writable pages. (Mprotect
only works on a page level.)
Cheers,
Andy
--
http://wingolog.org/
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 5/6] refactor and simplify ELF loader in objcodes.c
2013-05-23 10:58 ` Andy Wingo
@ 2013-05-23 20:35 ` Ludovic Courtès
2013-05-24 14:49 ` Andy Wingo
0 siblings, 1 reply; 24+ messages in thread
From: Ludovic Courtès @ 2013-05-23 20:35 UTC (permalink / raw)
To: Andy Wingo; +Cc: guile-devel
Andy Wingo <wingo@pobox.com> skribis:
> On Wed 22 May 2013 23:06, ludo@gnu.org (Ludovic Courtès) writes:
>
>>> +/* This function leaks the memory that it allocates. */
>>> +static char*
>>> +alloc_aligned (size_t len, unsigned alignment)
>>
>> What about using posix_memalign or similar?
>
> Not present on many systems, and gnulib can't make a useful wrapper. If
> you do the normal trick of allocating more with malloc then aligning it,
> you make it impossible to free().
OK.
>> Alternatively, using scm_gc_malloc_pointerless, which is known to return
>> 8-byte-aligned boundary?
>
> For two reasons. One is that this memory is not collectable. Static
> allocation of constants means that you can get pointers into the middle
> of the image from the heap. I wanted to avoid pressuring the GC.
>
> The other reason is that you can get pointers out of the image *to* the
> heap -- for data that is statically allocated, but fixed up at runtime.
> For example, keywords. Their stringbuf is allocated statically in
> read-only memory, but there is also a cell allocated for the keyword
> itself, and that lives on the heap.
Right.
> The best is to use mmap. But if you fall back, malloc is OK, and we can
> manually align it. In any case the loader ends up adding part of the
> image to the GC roots, so it's probably best that the memory not be
> managed by the GC itself.
But malloc’d and mmap’d regions are not scanned by default, so here they
would need to be scanned, or the GC-managed data they refer to would
need to be registered as root, no?
(This reminds me of
<http://thread.gmane.org/gmane.comp.programming.garbage-collection.boehmgc/2576>,
except that with Guile’s loader GNU_RELRO regions could refer to
GC-managed data.)
>>> + /* If our optimism failed, fall back. */
>>> + {
>>> + unsigned alignment = sniff_elf_alignment (data, end);
>>> +
>>> + if (alignment != 8)
>>
>> Since .go are only produced by Guile, can it really happen to have .go
>> files without 8-byte alignment?
>
> Yes! If the system supports mmap, .go files should be produced with
> 4096-byte alignment so that the whole thing can be mapped read-only, and
> write permissions only need be given to the writable pages. (Mprotect
> only works on a page level.)
Oh, I’m lost. Is it 8B or 4KiB? Can’t .go files always be
page-aligned?
Ludo’.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 5/6] refactor and simplify ELF loader in objcodes.c
2013-05-23 20:35 ` Ludovic Courtès
@ 2013-05-24 14:49 ` Andy Wingo
2013-05-24 15:37 ` Ludovic Courtès
0 siblings, 1 reply; 24+ messages in thread
From: Andy Wingo @ 2013-05-24 14:49 UTC (permalink / raw)
To: Ludovic Courtès; +Cc: guile-devel
On Thu 23 May 2013 22:35, ludo@gnu.org (Ludovic Courtès) writes:
>> The best is to use mmap. But if you fall back, malloc is OK, and we can
>> manually align it. In any case the loader ends up adding part of the
>> image to the GC roots, so it's probably best that the memory not be
>> managed by the GC itself.
>
> But malloc’d and mmap’d regions are not scanned by default, so here they
> would need to be scanned, or the GC-managed data they refer to would
> need to be registered as root, no?
>
> (This reminds me of
> <http://thread.gmane.org/gmane.comp.programming.garbage-collection.boehmgc/2576>,
> except that with Guile’s loader GNU_RELRO regions could refer to
> GC-managed data.)
See the patch :) There is a part of the dynamic section that specifies
the GC roots, which are added using GC_add_roots.
>>> Since .go are only produced by Guile, can it really happen to have .go
>>> files without 8-byte alignment?
>>
>> Yes! If the system supports mmap, .go files should be produced with
>> 4096-byte alignment so that the whole thing can be mapped read-only, and
>> write permissions only need be given to the writable pages. (Mprotect
>> only works on a page level.)
>
> Oh, I’m lost. Is it 8B or 4KiB? Can’t .go files always be
> page-aligned?
In 2.0 the alignment is 8 bytes. In 2.2, it can be any multiple of 8
bytes -- including 4096.
From linker.scm:
;;; By default, the ELF files created by the linker include some padding
;;; so that different parts of the file can be loaded in with different
;;; permissions. For example, some parts of the file are read-only and
;;; thus can be shared between processes. Some parts of the file don't
;;; need to be loaded at all. However this padding can be too much for
;;; interactive compilation, when the code is never written out to disk;
;;; in that case, pass #:page-aligned? #f to `link-elf'.
And:
If @var{page-aligned?} is true, read-only and writable data are
separated so that only those writable parts of the image need be mapped
with writable permissions. This is suitable for situations where you
would write a file out to disk and read it in with @code{mmap}.
Otherwise if @var{page-aligned?} is false, sections default to 8-byte
alignment.
Andy
--
http://wingolog.org/
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH 5/6] refactor and simplify ELF loader in objcodes.c
2013-05-24 14:49 ` Andy Wingo
@ 2013-05-24 15:37 ` Ludovic Courtès
0 siblings, 0 replies; 24+ messages in thread
From: Ludovic Courtès @ 2013-05-24 15:37 UTC (permalink / raw)
To: Andy Wingo; +Cc: guile-devel
Andy Wingo <wingo@pobox.com> skribis:
> On Thu 23 May 2013 22:35, ludo@gnu.org (Ludovic Courtès) writes:
>
>>> The best is to use mmap. But if you fall back, malloc is OK, and we can
>>> manually align it. In any case the loader ends up adding part of the
>>> image to the GC roots, so it's probably best that the memory not be
>>> managed by the GC itself.
>>
>> But malloc’d and mmap’d regions are not scanned by default, so here they
>> would need to be scanned, or the GC-managed data they refer to would
>> need to be registered as root, no?
>>
>> (This reminds me of
>> <http://thread.gmane.org/gmane.comp.programming.garbage-collection.boehmgc/2576>,
>> except that with Guile’s loader GNU_RELRO regions could refer to
>> GC-managed data.)
>
> See the patch :) There is a part of the dynamic section that specifies
> the GC roots, which are added using GC_add_roots.
Oh, great.
>>>> Since .go are only produced by Guile, can it really happen to have .go
>>>> files without 8-byte alignment?
>>>
>>> Yes! If the system supports mmap, .go files should be produced with
>>> 4096-byte alignment so that the whole thing can be mapped read-only, and
>>> write permissions only need be given to the writable pages. (Mprotect
>>> only works on a page level.)
>>
>> Oh, I’m lost. Is it 8B or 4KiB? Can’t .go files always be
>> page-aligned?
>
> In 2.0 the alignment is 8 bytes. In 2.2, it can be any multiple of 8
> bytes -- including 4096.
>
> From linker.scm:
>
> ;;; By default, the ELF files created by the linker include some padding
> ;;; so that different parts of the file can be loaded in with different
> ;;; permissions. For example, some parts of the file are read-only and
> ;;; thus can be shared between processes. Some parts of the file don't
> ;;; need to be loaded at all. However this padding can be too much for
> ;;; interactive compilation, when the code is never written out to disk;
> ;;; in that case, pass #:page-aligned? #f to `link-elf'.
>
> And:
>
> If @var{page-aligned?} is true, read-only and writable data are
> separated so that only those writable parts of the image need be mapped
> with writable permissions. This is suitable for situations where you
> would write a file out to disk and read it in with @code{mmap}.
> Otherwise if @var{page-aligned?} is false, sections default to 8-byte
> alignment.
OK, I understand now. Thanks for the pointers!
Ludo’.
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH 6/6] add (find-mapped-elf-image) procedure to (system vm objcode) module
2013-05-18 15:05 wip-linker: Refactor ELF linker and loader Andy Wingo
` (4 preceding siblings ...)
2013-05-18 15:05 ` [PATCH 5/6] refactor and simplify ELF loader in objcodes.c Andy Wingo
@ 2013-05-18 15:05 ` Andy Wingo
2013-05-22 21:09 ` Ludovic Courtès
2013-05-22 21:13 ` wip-linker: Refactor ELF linker and loader Ludovic Courtès
6 siblings, 1 reply; 24+ messages in thread
From: Andy Wingo @ 2013-05-18 15:05 UTC (permalink / raw)
To: guile-devel; +Cc: Andy Wingo
* libguile/objcodes.c (register_elf, scm_find_mapped_elf_image): New
interfaces that keep a list of all ELF mappings. Exported from the
(system vm objcode) module.
* module/system/vm/objcode.scm: Export find-mapped-elf-image.
---
libguile/objcodes.c | 108 ++++++++++++++++++++++++++++++++++++++++++
module/system/vm/objcode.scm | 5 +-
2 files changed, 111 insertions(+), 2 deletions(-)
diff --git a/libguile/objcodes.c b/libguile/objcodes.c
index c06265e..0c84475 100644
--- a/libguile/objcodes.c
+++ b/libguile/objcodes.c
@@ -89,6 +89,8 @@
#define ELFDATA ELFDATA2LSB
#endif
+static void register_elf (char *data, size_t len);
+
enum bytecode_kind
{
BYTECODE_KIND_NONE,
@@ -429,6 +431,8 @@ load_thunk_from_memory (char *data, size_t len, int is_read_only)
if (scm_is_true (init))
scm_call_0 (init);
+ register_elf (data, len);
+
/* Finally! Return the thunk. */
return entry;
@@ -607,6 +611,107 @@ scm_c_make_objcode_slice (SCM parent, const scm_t_uint8 *ptr)
}
#undef FUNC_NAME
+struct mapped_elf_image
+{
+ char *start;
+ char *end;
+};
+
+static struct mapped_elf_image *mapped_elf_images = NULL;
+static size_t mapped_elf_images_count = 0;
+static size_t mapped_elf_images_allocated = 0;
+
+static size_t
+find_mapped_elf_insertion_index (char *ptr)
+{
+ /* "mapped_elf_images_count" must never be dereferenced. */
+ size_t start = 0, end = mapped_elf_images_count;
+
+ while (start < end)
+ {
+ size_t n = start + (end - start) / 2;
+
+ if (ptr < mapped_elf_images[n].end)
+ end = n;
+ else
+ start = n + 1;
+ }
+
+ return start;
+}
+
+static void
+register_elf (char *data, size_t len)
+{
+ scm_i_pthread_mutex_lock (&scm_i_misc_mutex);
+ {
+ /* My kingdom for a generic growable sorted vector library. */
+ if (mapped_elf_images_count == mapped_elf_images_allocated)
+ {
+ struct mapped_elf_image *prev;
+ size_t n;
+
+ if (mapped_elf_images_allocated)
+ mapped_elf_images_allocated *= 2;
+ else
+ mapped_elf_images_allocated = 16;
+
+ prev = mapped_elf_images;
+ mapped_elf_images =
+ scm_gc_malloc_pointerless (sizeof (*mapped_elf_images)
+ * mapped_elf_images_allocated,
+ "mapped elf images");
+
+ for (n = 0; n < mapped_elf_images_count; n++)
+ {
+ mapped_elf_images[n].start = prev[n].start;
+ mapped_elf_images[n].end = prev[n].end;
+ }
+ }
+
+ {
+ size_t end;
+ size_t n = find_mapped_elf_insertion_index (data);
+
+ for (end = mapped_elf_images_count; n < end; end--)
+ {
+ mapped_elf_images[end].start = mapped_elf_images[end - 1].start;
+ mapped_elf_images[end].end = mapped_elf_images[end - 1].end;
+ }
+ mapped_elf_images_count++;
+
+ mapped_elf_images[n].start = data;
+ mapped_elf_images[n].end = data + len;
+ }
+ }
+ scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
+}
+
+static SCM
+scm_find_mapped_elf_image (SCM ip)
+{
+ char *ptr = (char *) scm_to_unsigned_integer (ip, 0, SCM_T_UINTPTR_MAX);
+ SCM result;
+
+ scm_i_pthread_mutex_lock (&scm_i_misc_mutex);
+ {
+ size_t n = find_mapped_elf_insertion_index ((char *) ptr);
+ if (n < mapped_elf_images_count
+ && mapped_elf_images[n].start <= ptr
+ && ptr < mapped_elf_images[n].end)
+ {
+ signed char *data = (signed char *) mapped_elf_images[n].start;
+ size_t len = mapped_elf_images[n].end - mapped_elf_images[n].start;
+ result = scm_c_take_gc_bytevector (data, len, SCM_BOOL_F);
+ }
+ else
+ result = SCM_BOOL_F;
+ }
+ scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
+
+ return result;
+}
+
\f
/*
* Scheme interface
@@ -745,6 +850,9 @@ scm_init_objcodes (void)
#include "libguile/objcodes.x"
#endif
+ scm_c_define_gsubr ("find-mapped-elf-image", 1, 0, 0,
+ (scm_t_subr) scm_find_mapped_elf_image);
+
scm_c_define ("word-size", scm_from_size_t (sizeof(SCM)));
scm_c_define ("byte-order", scm_from_uint16 (SCM_BYTE_ORDER));
}
diff --git a/module/system/vm/objcode.scm b/module/system/vm/objcode.scm
index f939a55..e2a93d7 100644
--- a/module/system/vm/objcode.scm
+++ b/module/system/vm/objcode.scm
@@ -1,6 +1,6 @@
;;; Guile VM object code
-;; Copyright (C) 2001, 2010, 2012 Free Software Foundation, Inc.
+;; Copyright (C) 2001, 2010, 2012, 2013 Free Software Foundation, Inc.
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public
@@ -22,7 +22,8 @@
#:export (objcode? objcode-meta
bytecode->objcode objcode->bytecode
load-thunk-from-file load-thunk-from-memory
- word-size byte-order))
+ word-size byte-order
+ find-mapped-elf-image))
(load-extension (string-append "libguile-" (effective-version))
"scm_init_objcodes")
--
1.7.10.4
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: wip-linker: Refactor ELF linker and loader
2013-05-18 15:05 wip-linker: Refactor ELF linker and loader Andy Wingo
` (5 preceding siblings ...)
2013-05-18 15:05 ` [PATCH 6/6] add (find-mapped-elf-image) procedure to (system vm objcode) module Andy Wingo
@ 2013-05-22 21:13 ` Ludovic Courtès
2013-05-23 7:22 ` Andy Wingo
6 siblings, 1 reply; 24+ messages in thread
From: Ludovic Courtès @ 2013-05-22 21:13 UTC (permalink / raw)
To: guile-devel
Andy Wingo <wingo@pobox.com> skribis:
> This is the first time I've tried git-send-email, so perhaps something
> goes wrong. In any case, the intention is to allow folks to comment
> easily on the code; we'll see how that goes.
Thanks for doing this!
Again, at this stage I don’t have anything clever to say about this
code. As far as I’m concerned, I don’t want to be a hindrance, so feel
free to push uncontroversial things like this after a few days if
nothing comes out.
WDYT?
Ludo’.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: wip-linker: Refactor ELF linker and loader
2013-05-22 21:13 ` wip-linker: Refactor ELF linker and loader Ludovic Courtès
@ 2013-05-23 7:22 ` Andy Wingo
0 siblings, 0 replies; 24+ messages in thread
From: Andy Wingo @ 2013-05-23 7:22 UTC (permalink / raw)
To: Ludovic Courtès; +Cc: guile-devel
On Wed 22 May 2013 23:13, ludo@gnu.org (Ludovic Courtès) writes:
> Andy Wingo <wingo@pobox.com> skribis:
>
>> This is the first time I've tried git-send-email, so perhaps something
>> goes wrong. In any case, the intention is to allow folks to comment
>> easily on the code; we'll see how that goes.
>
> Thanks for doing this!
And thank you for reviewing! I'll fix up the patches and push them
separately, and respond to questions inline.
> Again, at this stage I don’t have anything clever to say about this
> code. As far as I’m concerned, I don’t want to be a hindrance, so feel
> free to push uncontroversial things like this after a few days if
> nothing comes out.
>
> WDYT?
Sounds good to me. This particular patch series was very much
"down-in-the-weeds" but I wanted to try to page it into someone else's
mind, and to take advantage of the civodul style checker ;-)
Cheers,
Andy
--
http://wingolog.org/
^ permalink raw reply [flat|nested] 24+ messages in thread