1#!/usr/bin/env bash
2#
3# Run as: CLANG=bin/clang ZLIB_SRC=src/zlib \
4#             build_symbolizer.sh runtime_build/lib/clang/4.0.0/lib/linux/
5# zlib can be downloaded from http://www.zlib.net.
6#
7# Script compiles self-contained object file with symbolization code and injects
8# it into the given set of runtime libraries. Script updates only libraries
9# which has unresolved __sanitizer_symbolize_* symbols and matches architecture.
10# Object file is be compiled from LLVM sources with dependencies like libc++ and
11# zlib. Then it internalizes symbols in the file, so that it can be linked
12# into arbitrary programs, avoiding conflicts with the program own symbols and
13# avoiding dependencies on any program symbols. The only acceptable dependencies
14# are libc and __sanitizer::internal_* from sanitizer runtime.
15#
16# Symbols exported by the object file will be used by Sanitizer runtime
17# libraries to symbolize code/data in-process.
18#
19# The script will modify the output directory which is given as the first
20# argument to the script.
21#
22# FIXME: We should really be using a simpler approach to building this object
23# file, and it should be available as a regular cmake rule. Conceptually, we
24# want to be doing "ld -r" followed by "objcopy -G" to create a relocatable
25# object file with only our entry points exposed. However, this does not work at
26# present, see PR30750.
27
28set -x
29set -e
30set -u
31
32SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
33SRC_DIR=$(readlink -f $SCRIPT_DIR/..)
34TARGE_DIR=$(readlink -f $1)
35COMPILER_RT_SRC=$(readlink -f ${SCRIPT_DIR}/../../../..)
36LLVM_SRC=${LLVM_SRC:-${COMPILER_RT_SRC}/../llvm}
37LLVM_SRC=$(readlink -f $LLVM_SRC)
38
39if [[ "$ZLIB_SRC" == ""  ||
40      ! -x "${ZLIB_SRC}/configure" ||
41      ! -f "${ZLIB_SRC}/zlib.h" ]]; then
42  echo "Missing or incomplete ZLIB_SRC"
43  exit 1
44fi
45ZLIB_SRC=$(readlink -f $ZLIB_SRC)
46
47CLANG="${CLANG:-`which clang`}"
48CLANG_DIR=$(readlink -f $(dirname "$CLANG"))
49
50BUILD_DIR=$(readlink -f ./symbolizer)
51mkdir -p $BUILD_DIR
52cd $BUILD_DIR
53
54CC=$CLANG_DIR/clang
55CXX=$CLANG_DIR/clang++
56TBLGEN=$CLANG_DIR/llvm-tblgen
57OPT=$CLANG_DIR/opt
58AR=$CLANG_DIR/llvm-ar
59LINK=$CLANG_DIR/llvm-link
60
61for F in $CC $CXX $TBLGEN $LINK $OPT $AR; do
62  if [[ ! -x "$F" ]]; then
63    echo "Missing $F"
64     exit 1
65  fi
66done
67
68ZLIB_BUILD=${BUILD_DIR}/zlib
69LIBCXX_BUILD=${BUILD_DIR}/libcxx
70LLVM_BUILD=${BUILD_DIR}/llvm
71SYMBOLIZER_BUILD=${BUILD_DIR}/symbolizer
72
73FLAGS=${FLAGS:-}
74TARGET_TRIPLE=$($CC -print-target-triple $FLAGS)
75if [[ "$FLAGS" =~ "-m32" ]] ; then
76  # Avoid new wrappers.
77  FLAGS+=" -U_FILE_OFFSET_BITS"
78fi
79FLAGS+=" -fPIC -flto -Oz -g0 -DNDEBUG -target $TARGET_TRIPLE -Wno-unused-command-line-argument"
80LINKFLAGS="-fuse-ld=lld -target $TARGET_TRIPLE"
81
82# Build zlib.
83mkdir -p ${ZLIB_BUILD}
84cd ${ZLIB_BUILD}
85cp -r ${ZLIB_SRC}/* .
86AR="${AR}" CC="${CC}" CFLAGS="$FLAGS -Wno-deprecated-non-prototype" RANLIB=/bin/true ./configure --static
87make -j libz.a
88
89# Build and install libcxxabi and libcxx.
90if [[ ! -d ${LIBCXX_BUILD} ]]; then
91  mkdir -p ${LIBCXX_BUILD}
92  cd ${LIBCXX_BUILD}
93  LIBCXX_FLAGS="${FLAGS} -Wno-macro-redefined"
94  cmake -GNinja \
95    -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi" \
96    -DCMAKE_BUILD_TYPE=Release \
97    -DCMAKE_C_COMPILER=$CC \
98    -DCMAKE_CXX_COMPILER=$CXX \
99    -DCMAKE_C_FLAGS_RELEASE="${LIBCXX_FLAGS}" \
100    -DCMAKE_CXX_FLAGS_RELEASE="${LIBCXX_FLAGS}" \
101    -DLIBCXXABI_ENABLE_ASSERTIONS=OFF \
102    -DLIBCXXABI_ENABLE_EXCEPTIONS=OFF \
103    -DLIBCXX_ENABLE_ASSERTIONS=OFF \
104    -DLIBCXX_ENABLE_EXCEPTIONS=OFF \
105    -DLIBCXX_ENABLE_RTTI=OFF \
106    -DCMAKE_SHARED_LINKER_FLAGS="$LINKFLAGS" \
107  $LLVM_SRC/../runtimes
108fi
109cd ${LIBCXX_BUILD}
110ninja cxx cxxabi
111
112FLAGS="${FLAGS} -fno-rtti -fno-exceptions"
113LLVM_CFLAGS="${FLAGS} -Wno-global-constructors"
114LLVM_CXXFLAGS="${LLVM_CFLAGS} -nostdinc++ -I${ZLIB_BUILD} -isystem ${LIBCXX_BUILD}/include -isystem ${LIBCXX_BUILD}/include/c++/v1"
115
116# Build LLVM.
117if [[ ! -d ${LLVM_BUILD} ]]; then
118  mkdir -p ${LLVM_BUILD}
119  cd ${LLVM_BUILD}
120  cmake -GNinja \
121    -DCMAKE_BUILD_TYPE=Release \
122    -DCMAKE_C_COMPILER=$CC \
123    -DCMAKE_CXX_COMPILER=$CXX \
124    -DCMAKE_C_FLAGS="${LLVM_CFLAGS}" \
125    -DCMAKE_CXX_FLAGS="${LLVM_CXXFLAGS}" \
126    -DCMAKE_EXE_LINKER_FLAGS="$LINKFLAGS -stdlib=libc++ -L${LIBCXX_BUILD}/lib" \
127    -DLLVM_TABLEGEN=$TBLGEN \
128    -DLLVM_ENABLE_ZLIB=ON \
129    -DLLVM_ENABLE_TERMINFO=OFF \
130    -DLLVM_ENABLE_THREADS=OFF \
131  $LLVM_SRC
132fi
133cd ${LLVM_BUILD}
134ninja LLVMSymbolize LLVMObject LLVMBinaryFormat LLVMDebugInfoDWARF LLVMSupport LLVMDebugInfoPDB LLVMDebuginfod LLVMMC LLVMDemangle LLVMTextAPI LLVMTargetParser
135
136cd ${BUILD_DIR}
137rm -rf ${SYMBOLIZER_BUILD}
138mkdir ${SYMBOLIZER_BUILD}
139cd ${SYMBOLIZER_BUILD}
140
141echo "Compiling..."
142SYMBOLIZER_FLAGS="$LLVM_CXXFLAGS -I${LLVM_SRC}/include -I${LLVM_BUILD}/include -std=c++17"
143$CXX $SYMBOLIZER_FLAGS ${SRC_DIR}/sanitizer_symbolize.cpp ${SRC_DIR}/sanitizer_wrappers.cpp -c
144$AR rc symbolizer.a sanitizer_symbolize.o sanitizer_wrappers.o
145
146SYMBOLIZER_API_LIST=__sanitizer_symbolize_code
147SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_data
148SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_flush
149SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_demangle
150SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_set_demangle
151SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_set_inline_frames
152
153LIBCXX_ARCHIVE_DIR=$(dirname $(find $LIBCXX_BUILD -name libc++.a | head -n1))
154
155# Merge all the object files together and copy the resulting library back.
156$LINK $LIBCXX_ARCHIVE_DIR/libc++.a \
157      $LIBCXX_ARCHIVE_DIR/libc++abi.a \
158      $LLVM_BUILD/lib/libLLVMSymbolize.a \
159      $LLVM_BUILD/lib/libLLVMObject.a \
160      $LLVM_BUILD/lib/libLLVMBinaryFormat.a \
161      $LLVM_BUILD/lib/libLLVMDebugInfoDWARF.a \
162      $LLVM_BUILD/lib/libLLVMSupport.a \
163      $LLVM_BUILD/lib/libLLVMDebugInfoPDB.a \
164      $LLVM_BUILD/lib/libLLVMDebugInfoMSF.a \
165      $LLVM_BUILD/lib/libLLVMDebugInfoCodeView.a \
166      $LLVM_BUILD/lib/libLLVMDebuginfod.a \
167      $LLVM_BUILD/lib/libLLVMDemangle.a \
168      $LLVM_BUILD/lib/libLLVMMC.a \
169      $LLVM_BUILD/lib/libLLVMTextAPI.a \
170      $LLVM_BUILD/lib/libLLVMTargetParser.a \
171      $ZLIB_BUILD/libz.a \
172      symbolizer.a \
173      -ignore-non-bitcode -o all.bc
174
175echo "Optimizing..."
176$OPT -passes=internalize -internalize-public-api-list=${SYMBOLIZER_API_LIST} all.bc -o opt.bc
177$CC $FLAGS -fno-lto -c opt.bc -o symbolizer.o
178
179echo "Checking undefined symbols..."
180nm -f posix -g symbolizer.o | cut -f 1,2 -d \  | LC_COLLATE=C sort -u > undefined.new
181(diff -u $SCRIPT_DIR/global_symbols.txt undefined.new | grep -E "^\+[^+]") && \
182  (echo "Failed: unexpected symbols"; exit 1)
183
184arch() {
185  objdump -f $1 | grep -m1 -Po "(?<=file format ).*$"
186}
187
188SYMBOLIZER_FORMAT=$(arch symbolizer.o)
189echo "Injecting $SYMBOLIZER_FORMAT symbolizer..."
190for A in $TARGE_DIR/libclang_rt.*san*.a; do
191  A_FORMAT=$(arch $A)
192  if [[ "$A_FORMAT" != "$SYMBOLIZER_FORMAT" ]] ; then
193    continue
194  fi
195  (nm -u $A 2>/dev/null | grep -E "__sanitizer_symbolize_code" >/dev/null) || continue
196  echo "$A"
197  $AR rcs $A symbolizer.o
198done
199
200echo "Success!"
201