In Compile Linux Kernel to LLVM Bitcode we use wllvm to generate the LLVM bitcode of Linux kernel. when analyzing the kernel, we sometimes need to disable some optimization passes. However, it’s hard to do with clang. First compiling it to -O0 IR and then applying some optimization passes on it seems easier.

The optimization level of the LLVM IR is -O2 by default. If we need an -O0 LLVM IR, we need to modify the kernel compilation flag.

make KCFLAG='-O0' CC=wllvm

However, it doesn’t work because some kernel code cannot be compiled with -O0.

Fortunately, we have another way to get our -O0 LLVM IR.

When wllvm generates LLVM bitcode (.filename.o.bc) for a C file, it also generates the command to compile the file in .filename.o.cmd. We can get the command by:

path='/path/to/our/output'
awk_cmd='{ 
s = ""; 
ofile = 0;
for (i = 3; i <= NF; i++)
{
   if (ofile == 1)
   {
      s = s "'"$path"'" " ";
      ofile = 0;
   }
   else
   {
      if ($i == "-o")
         ofile = 1;
      s = s $i " ";
   }
}
print s }'
cmd_line=`head -n 1 $1 | awk "$awk_cmd"`

The output file path will be replaced by the path we provide.

This command generates native code. To get LLVM bitcode, we need to add -emit-llvm. To disable optimization, we need to add -mllvm -disable-llvm-optzns.

After generating -O0 bitcode, we can optimize it with any passes we want. Finally link all bitcodes we generate and we get a kernel bitcode with custom optimization.

Below is an example script:

#!/bin/bash


llvm_linker=llvm-link

function get_cmd
{
    path="$2"
    
    # change output file name
    awk_cmd='{ 
    s = ""; 
    ofile = 0;
    for (i = 3; i <= NF; i++)
    {
      if (ofile == 1)
      {
        s = s "'"$path"'" " ";
        ofile = 0;
      }
      else
      {
        if ($i == "-o")
           ofile = 1;
        s = s $i " ";
      }
    }
    print s }'
    cmd_line=`head -n 1 $1 | awk "$awk_cmd"`
    
    # output LLVM IR
    cmd_line="${cmd_line} -emit-llvm"
    # disable opt
    cmd_line="${cmd_line} -mllvm -disable-llvm-optzns"
    echo $cmd_line
    return
}

function compile_opt
{
    cmd_path="$1"
    basepath="$2"
    tmp_path="${basepath}.tmp.bc"
    output_path="${basepath}.opt.bc"
    cmd=`get_cmd "$cmd_path" "$tmp_path"`
    eval "$cmd" 2> /dev/null
    cmd_result=$?
    if [ ! -e "$tmp_path" ] || [ $cmd_result -ne 0 ]
    then
        exit 1
    fi
    opt -always-inline -inline -mem2reg -simplifycfg -instcombine "$tmp_path" -o "$output_path"
    cmd_result=$?
    if [ ! -e "$output_path" ] || [ $cmd_result -ne 0 ]
    then
        exit 1
    fi
}

while getopts ":l:" opt; do
    case $opt in
        l) llvm_linker=$OPTARG
           ;;
        \?) echo "Usage: -l <llvm-link>" 1>&2
            exit 1
            ;;
    esac
done
shift $((OPTIND-1))
from_file=$1
if [ -z "$from_file" ]
then
    echo "no input file" 1>&2
    exit 1
fi

extract-bc -l false -m $from_file
manifest="${from_file}.llvm.manifest"
if [ ! -r "$manifest" ]
then
    echo "no manifest file" 1>&2
    exit 1
fi

tmpmanifest="${from_file}.tmp.manifest"
echo -n "" > "$tmpmanifest"

cat "$manifest" |
    while read o_bc_name;
    do
        basename=`basename "${o_bc_name%.o.bc}"`
        # basename="${basename#.}"
        dir="${o_bc_name%/*}"
        basepath="${dir}/${basename}"
        if [ -z "$dir" ] && [ -z "$basename" ]
        then
            continue
        fi
        
        cmd_path="${basepath}.o.cmd"
        if [ ! -r "${cmd_path}" ]
        then
            echo "${cmd_path} not found"
            continue
        fi

        while [ `jobs | wc -l` -ge 8 ]
        do
            sleep 0.1
        done
        output_path="${basepath}.opt.bc"
        compile_opt "$cmd_path" "$basepath" &
        echo "$output_path"
        echo "$output_path" >> "${tmpmanifest}"
    done

while [ `jobs | wc -l` -ne 0 ]
do
    sleep 1
done

outputfile="${from_file}.opt.bc"
$llvm_linker -o "$outputfile" `cat "${tmpmanifest}"`