Garbage collection is a technique of memory management where the collector tries to reclaim the memory occupied by objects that are no longer in use. This frees the programmer from of being required to keep track of the lifetimes of heap objects.
In this recipe, we will see how to integrate LLVM into a compiler for a language that supports garbage collection. LLVM does not itself provide a garbage collector, but provides a framework for describing the garbage collector's requirements to the compiler.
We will see in the following recipe how the LLVM IR code, with garbage collection intrinsic functions, is converted to the corresponding machine assembly code:
$ cat testgc.ll declare i8* @llvm_gc_allocate(i32) declare void @llvm_gc_initialize(i32) declare void @llvm.gcroot(i8**, i8*) declare void @llvm.gcwrite(i8*, i8*, i8**) define i32 @main() gc "shadow-stack" { entry: %A = alloca i8* %B = alloca i8** call void @llvm_gc_initialize(i32 1048576) ; Start with 1MB heap ;; void *A; call void @llvm.gcroot(i8** %A, i8* null) ;; A = gcalloc(10); %Aptr = call i8* @llvm_gc_allocate(i32 10) store i8* %Aptr, i8** %A ;; void **B; %tmp.1 = bitcast i8*** %B to i8** call void @llvm.gcroot(i8** %tmp.1, i8* null) ;; B = gcalloc(4); %B.upgrd.1 = call i8* @llvm_gc_allocate(i32 8) %tmp.2 = bitcast i8* %B.upgrd.1 to i8** store i8** %tmp.2, i8*** %B ;; *B = A; %B.1 = load i8**, i8*** %B %A.1 = load i8*, i8** %A call void @llvm.gcwrite(i8* %A.1, i8* %B.upgrd.1, i8** %B.1) br label %AllocLoop AllocLoop: %i = phi i32 [ 0, %entry ], [ %indvar.next, %AllocLoop ] ;; Allocated mem: allocated memory is immediately dead. call i8* @llvm_gc_allocate(i32 100) %indvar.next = add i32 %i, 1 %exitcond = icmp eq i32 %indvar.next, 10000000 br i1 %exitcond, label %Exit, label %AllocLoop Exit: ret i32 0 } declare void @__main()
llc
tool to generate the assembly code and view the assembly code using the cat
command:$ llc testgc.ll $ cat testgc.s .text .file "testgc.ll" .globl main .align 16, 0x90 .type main,@function main: # @main .Lfunc_begin0: .cfi_startproc .cfi_personality 3, __gcc_personality_v0 .cfi_lsda 3, .Lexception0 # BB#0: # %entry pushq %rbx .Ltmp9: .cfi_def_cfa_offset 16 subq $32, %rsp .Ltmp10: .cfi_def_cfa_offset 48 .Ltmp11: .cfi_offset %rbx, -16 movq llvm_gc_root_chain(%rip), %rax movq $__gc_main, 8(%rsp) movq $0, 16(%rsp) movq %rax, (%rsp) leaq (%rsp), %rax movq %rax, llvm_gc_root_chain(%rip) movq $0, 24(%rsp) .Ltmp0: movl $1048576, %edi # imm = 0x100000 callq llvm_gc_initialize .Ltmp1: # BB#1: # %entry.cont3 .Ltmp2: movl $10, %edi callq llvm_gc_allocate .Ltmp3: # BB#2: # %entry.cont2 movq %rax, 16(%rsp) .Ltmp4: movl $8, %edi callq llvm_gc_allocate .Ltmp5: # BB#3: # %entry.cont movq %rax, 24(%rsp) movq 16(%rsp), %rcx movq %rcx, (%rax) movl $10000000, %ebx # imm = 0x989680 .align 16, 0x90 .LBB0_4: # %AllocLoop # =>This Inner Loop Header: Depth=1 .Ltmp6: movl $100, %edi callq llvm_gc_allocate .Ltmp7: # BB#5: # %AllocLoop.cont # in Loop: Header=BB0_4 Depth=1 decl %ebx jne .LBB0_4 # BB#6: # %Exit movq (%rsp), %rax movq %rax, llvm_gc_root_chain(%rip) xorl %eax, %eax addq $32, %rsp popq %rbx retq .LBB0_7: # %gc_cleanup .Ltmp8: movq (%rsp), %rcx movq %rcx, llvm_gc_root_chain(%rip) movq %rax, %rdi callq _Unwind_Resume .Lfunc_end0: .size main, .Lfunc_end0-main .cfi_endproc .section .gcc_except_table,"a",@progbits .align 4 GCC_except_table0: .Lexception0: .byte 255 # @LPStart Encoding = omit .byte 3 # @TType Encoding = udata4 .asciz "234" # @TType base offset .byte 3 # Call site Encoding = udata4 .byte 26 # Call site table length .long .Ltmp0-.Lfunc_begin0 # >> Call Site 1 << .long .Ltmp7-.Ltmp0 # Call between .Ltmp0 and .Ltmp7 .long .Ltmp8-.Lfunc_begin0 # jumps to .Ltmp8 .byte 0 # On action: cleanup .long .Ltmp7-.Lfunc_begin0 # >> Call Site 2 << .long .Lfunc_end0-.Ltmp7 # Call between .Ltmp7 and .Lfunc_end0 .long 0 # has no landing pad .byte 0 # On action: cleanup .align 4 .type llvm_gc_root_chain,@object # @llvm_gc_root_chain .bss .weak llvm_gc_root_chain .align 8 llvm_gc_root_chain: .quad 0 .size llvm_gc_root_chain, 8 .type __gc_main,@object # @__gc_main .section .rodata,"a",@progbits .align 8 __gc_main: .long 2 # 0x2 .long 0 # 0x0 .size __gc_main, 8 .section ".note.GNU-stack","",@progbits
In the preceding code, in the main function, we are using the built-in GC collector strategy called shadow-stack
, which maintains a linked list of stack roots()
:
define i32 @main() gc "shadow-stack"
It mirrors the machine stack. We can provide any other technique, if we want to, by specifying its name after the function name in this format, gc "strategy name"
. This strategy name can either be the built-in strategy or our own custom strategy for garbage collection.
To identify the roots, that is, the references to the heap object, LLVM makes use of the intrinsic function @llvm.gcroot
or the .statepoint
relocation sequence. The llvm.gcroot
intrinsic function informs LLVM that a stack variable references an object on the heap and it needs to be tracked by the collector. In the preceding code, the following line is the call to the llvm.gcroot
function to mark the %tmp.1
stack variable:
call void @llvm.gcroot(i8** %tmp.1, i8* null)
The llvm.gcwrite
function is a write barrier. This means that whenever a program on which garbage collection is being done, it writes a pointer to a field of a heap object, the collector is informed about that. The llvm.gcread
intrinsic function is also present, which informs the garbage collector when the program reads a pointer to a field of a heap object. The following line of code writes the %A.1
value to the %B.upgrd
heap object:
call void @llvm.gcwrite(i8* %A.1, i8* %B.upgrd.1, i8** %B.1)
3.12.107.31