Writing the garbage collector with LLVM

Garbage collection is a technique of memory management where the collector tries to reclaim the memory occupied by objects that are no longer in use. This frees the programmer from of being required to keep track of the lifetimes of heap objects.

In this recipe, we will see how to integrate LLVM into a compiler for a language that supports garbage collection. LLVM does not itself provide a garbage collector, but provides a framework for describing the garbage collector's requirements to the compiler.

Getting ready

LLVM must be built and installed.

How to do it…

We will see in the following recipe how the LLVM IR code, with garbage collection intrinsic functions, is converted to the corresponding machine assembly code:

  1. Write the test code:
    $ cat testgc.ll
    
    declare i8* @llvm_gc_allocate(i32)
    declare void @llvm_gc_initialize(i32)
    
    declare void @llvm.gcroot(i8**, i8*)
    declare void @llvm.gcwrite(i8*, i8*, i8**)
    
    define i32 @main() gc "shadow-stack" {
    entry:
      %A = alloca i8*
      %B = alloca i8**
    
      call void @llvm_gc_initialize(i32 1048576)  ; Start with 1MB heap
    
            ;; void *A;
      call void @llvm.gcroot(i8** %A, i8* null)
    
            ;; A = gcalloc(10);
      %Aptr = call i8* @llvm_gc_allocate(i32 10)
      store i8* %Aptr, i8** %A
    
            ;; void **B;
      %tmp.1 = bitcast i8*** %B to i8**
      call void @llvm.gcroot(i8** %tmp.1, i8* null)
    
      ;; B = gcalloc(4);
      %B.upgrd.1 = call i8* @llvm_gc_allocate(i32 8)
      %tmp.2 = bitcast i8* %B.upgrd.1 to i8**
      store i8** %tmp.2, i8*** %B
    
      ;; *B = A;
      %B.1 = load i8**, i8*** %B
      %A.1 = load i8*, i8** %A
      call void @llvm.gcwrite(i8* %A.1, i8* %B.upgrd.1, i8** %B.1)
    
      br label %AllocLoop
    
    AllocLoop:
      %i = phi i32 [ 0, %entry ], [ %indvar.next, %AllocLoop ]
            ;; Allocated mem: allocated memory is immediately dead.
      call i8* @llvm_gc_allocate(i32 100)
    
      %indvar.next = add i32 %i, 1
      %exitcond = icmp eq i32 %indvar.next, 10000000
      br i1 %exitcond, label %Exit, label %AllocLoop
    
    Exit:
      ret i32 0
    }
    
    declare void @__main()
    
  2. Use the llc tool to generate the assembly code and view the assembly code using the cat command:
    $ llc testgc.ll
    
    $ cat testgc.s
      .text
      .file  "testgc.ll"
      .globl  main
      .align  16, 0x90
      .type  main,@function
    main:                                   # @main
    .Lfunc_begin0:
      .cfi_startproc
      .cfi_personality 3, __gcc_personality_v0
      .cfi_lsda 3, .Lexception0
    # BB#0:                                 # %entry
      pushq  %rbx
    .Ltmp9:
      .cfi_def_cfa_offset 16
      subq  $32, %rsp
    .Ltmp10:
      .cfi_def_cfa_offset 48
    .Ltmp11:
      .cfi_offset %rbx, -16
      movq  llvm_gc_root_chain(%rip), %rax
      movq  $__gc_main, 8(%rsp)
      movq  $0, 16(%rsp)
      movq  %rax, (%rsp)
      leaq  (%rsp), %rax
      movq  %rax, llvm_gc_root_chain(%rip)
      movq  $0, 24(%rsp)
    .Ltmp0:
      movl  $1048576, %edi          # imm = 0x100000
      callq  llvm_gc_initialize
    .Ltmp1:
    # BB#1:                                 # %entry.cont3
    .Ltmp2:
      movl  $10, %edi
      callq  llvm_gc_allocate
    .Ltmp3:
    # BB#2:                                 # %entry.cont2
      movq  %rax, 16(%rsp)
    .Ltmp4:
      movl  $8, %edi
      callq  llvm_gc_allocate
    .Ltmp5:
    # BB#3:                                 # %entry.cont
      movq  %rax, 24(%rsp)
      movq  16(%rsp), %rcx
      movq  %rcx, (%rax)
      movl  $10000000, %ebx         # imm = 0x989680
      .align  16, 0x90
    .LBB0_4:                                # %AllocLoop
                                            # =>This Inner Loop Header: Depth=1
    .Ltmp6:
      movl  $100, %edi
      callq  llvm_gc_allocate
    .Ltmp7:
    # BB#5:                                 # %AllocLoop.cont
                                            #   in Loop: Header=BB0_4 Depth=1
      decl  %ebx
      jne  .LBB0_4
    # BB#6:                                 # %Exit
      movq  (%rsp), %rax
      movq  %rax, llvm_gc_root_chain(%rip)
      xorl  %eax, %eax
      addq  $32, %rsp
      popq  %rbx
      retq
    .LBB0_7:                                # %gc_cleanup
    .Ltmp8:
      movq  (%rsp), %rcx
      movq  %rcx, llvm_gc_root_chain(%rip)
      movq  %rax, %rdi
      callq  _Unwind_Resume
    .Lfunc_end0:
      .size  main, .Lfunc_end0-main
      .cfi_endproc
      .section  .gcc_except_table,"a",@progbits
      .align  4
    GCC_except_table0:
    .Lexception0:
      .byte  255                     # @LPStart Encoding = omit
      .byte  3                       # @TType Encoding = udata4
      .asciz  "234"                  # @TType base offset
      .byte  3                       # Call site Encoding = udata4
      .byte  26                      # Call site table length
      .long  .Ltmp0-.Lfunc_begin0    # >> Call Site 1 <<
      .long  .Ltmp7-.Ltmp0           #   Call between .Ltmp0 and .Ltmp7
      .long  .Ltmp8-.Lfunc_begin0    #     jumps to .Ltmp8
      .byte  0                       #   On action: cleanup
      .long  .Ltmp7-.Lfunc_begin0    # >> Call Site 2 <<
      .long  .Lfunc_end0-.Ltmp7      #   Call between .Ltmp7 and .Lfunc_end0
      .long  0                       #     has no landing pad
      .byte  0                       #   On action: cleanup
      .align  4
    
      .type  llvm_gc_root_chain,@object # @llvm_gc_root_chain
      .bss
      .weak  llvm_gc_root_chain
      .align  8
    llvm_gc_root_chain:
      .quad  0
      .size  llvm_gc_root_chain, 8
    
      .type  __gc_main,@object       # @__gc_main
      .section  .rodata,"a",@progbits
      .align  8
    __gc_main:
      .long  2                       # 0x2
      .long  0                       # 0x0
      .size  __gc_main, 8
    
      .section  ".note.GNU-stack","",@progbits
    

How it works…

In the preceding code, in the main function, we are using the built-in GC collector strategy called shadow-stack, which maintains a linked list of stack roots():

define i32 @main() gc "shadow-stack"

It mirrors the machine stack. We can provide any other technique, if we want to, by specifying its name after the function name in this format, gc "strategy name". This strategy name can either be the built-in strategy or our own custom strategy for garbage collection.

To identify the roots, that is, the references to the heap object, LLVM makes use of the intrinsic function @llvm.gcroot or the .statepoint relocation sequence. The llvm.gcroot intrinsic function informs LLVM that a stack variable references an object on the heap and it needs to be tracked by the collector. In the preceding code, the following line is the call to the llvm.gcroot function to mark the %tmp.1 stack variable:

call void @llvm.gcroot(i8** %tmp.1, i8* null)

The llvm.gcwrite function is a write barrier. This means that whenever a program on which garbage collection is being done, it writes a pointer to a field of a heap object, the collector is informed about that. The llvm.gcread intrinsic function is also present, which informs the garbage collector when the program reads a pointer to a field of a heap object. The following line of code writes the %A.1 value to the %B.upgrd heap object:

call void @llvm.gcwrite(i8* %A.1, i8* %B.upgrd.1, i8** %B.1)

Note

Note that LLVM does not provide a garbage collector. It should be a part of the runtime library of the language. The preceding explanation deals with the infrastructure that LLVM provides for describing garbage collector requirements to the compiler.

See also

..................Content has been hidden....................

You can't read the all page of ebook, please click here login for view all page.
Reset
3.138.123.106