469 lines
16 KiB
Python
Executable file
469 lines
16 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
#===- lib/hwasan/scripts/hwasan_symbolize ----------------------------------===#
|
|
#
|
|
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
# See https:#llvm.org/LICENSE.txt for license information.
|
|
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
#
|
|
#===------------------------------------------------------------------------===#
|
|
#
|
|
# HWAddressSanitizer offline symbolization script.
|
|
#
|
|
#===------------------------------------------------------------------------===#
|
|
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
import argparse
|
|
import glob
|
|
import html
|
|
import json
|
|
import mmap
|
|
import os
|
|
import re
|
|
import struct
|
|
import subprocess
|
|
import sys
|
|
|
|
if sys.version_info.major < 3:
|
|
# Simulate Python 3.x behaviour of defaulting to UTF-8 for print. This is
|
|
# important in case any symbols are non-ASCII.
|
|
import codecs
|
|
sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
|
|
|
|
# Below, a parser for a subset of ELF. It only supports 64 bit, little-endian,
|
|
# and only parses what is necessary to find the build ids. It uses a memoryview
|
|
# into an mmap to avoid copying.
|
|
Ehdr_size = 64
|
|
e_shnum_offset = 60
|
|
e_shoff_offset = 40
|
|
|
|
Shdr_size = 64
|
|
sh_type_offset = 4
|
|
sh_offset_offset = 24
|
|
sh_size_offset = 32
|
|
SHT_NOTE = 7
|
|
|
|
Nhdr_size = 12
|
|
NT_GNU_BUILD_ID = 3
|
|
|
|
def align_up(size, alignment):
|
|
return (size + alignment - 1) & ~(alignment - 1)
|
|
|
|
def handle_Nhdr(mv, sh_size):
|
|
offset = 0
|
|
while offset < sh_size:
|
|
n_namesz, n_descsz, n_type = struct.unpack_from('<III', buffer=mv,
|
|
offset=offset)
|
|
if (n_type == NT_GNU_BUILD_ID and n_namesz == 4 and
|
|
mv[offset + Nhdr_size: offset + Nhdr_size + 4] == b"GNU\x00"):
|
|
value = mv[offset + Nhdr_size + 4: offset + Nhdr_size + 4 + n_descsz]
|
|
return value.hex()
|
|
offset += Nhdr_size + align_up(n_namesz, 4) + align_up(n_descsz, 4)
|
|
return None
|
|
|
|
def handle_Shdr(mv):
|
|
sh_type, = struct.unpack_from('<I', buffer=mv, offset=sh_type_offset)
|
|
if sh_type != SHT_NOTE:
|
|
return None, None
|
|
sh_offset, = struct.unpack_from('<Q', buffer=mv, offset=sh_offset_offset)
|
|
sh_size, = struct.unpack_from('<Q', buffer=mv, offset=sh_size_offset)
|
|
return sh_offset, sh_size
|
|
|
|
def handle_elf(mv):
|
|
# \x02 is ELFCLASS64, \x01 is ELFDATA2LSB. HWASan currently only works on
|
|
# 64-bit little endian platforms (x86_64 and ARM64). If this changes, we will
|
|
# have to extend the parsing code.
|
|
if mv[:6] != b'\x7fELF\x02\x01':
|
|
return None
|
|
e_shnum, = struct.unpack_from('<H', buffer=mv, offset=e_shnum_offset)
|
|
e_shoff, = struct.unpack_from('<Q', buffer=mv, offset=e_shoff_offset)
|
|
for i in range(0, e_shnum):
|
|
start = e_shoff + i * Shdr_size
|
|
sh_offset, sh_size = handle_Shdr(mv[start: start + Shdr_size])
|
|
if sh_offset is None:
|
|
continue
|
|
note_hdr = mv[sh_offset: sh_offset + sh_size]
|
|
result = handle_Nhdr(note_hdr, sh_size)
|
|
if result is not None:
|
|
return result
|
|
|
|
def get_buildid(filename):
|
|
with open(filename, "r") as fd:
|
|
if os.fstat(fd.fileno()).st_size < Ehdr_size:
|
|
return None
|
|
with mmap.mmap(fd.fileno(), 0, access=mmap.ACCESS_READ) as m:
|
|
with memoryview(m) as mv:
|
|
return handle_elf(mv)
|
|
|
|
class Symbolizer:
|
|
def __init__(self, path, binary_prefixes, paths_to_cut):
|
|
self.__pipe = None
|
|
self.__path = path
|
|
self.__binary_prefixes = binary_prefixes
|
|
self.__paths_to_cut = paths_to_cut
|
|
self.__log = False
|
|
self.__warnings = set()
|
|
self.__index = {}
|
|
self.__link_prefixes = []
|
|
self.__html = False
|
|
self.__last_access_address = None
|
|
self.__last_access_tag = None
|
|
|
|
def enable_html(self, enable):
|
|
self.__html = enable
|
|
|
|
def enable_logging(self, enable):
|
|
self.__log = enable
|
|
|
|
def maybe_escape(self, text):
|
|
if self.__html:
|
|
# We need to manually use for leading spaces, html.escape does
|
|
# not do that, and HTML ignores them.
|
|
spaces = 0
|
|
for i, c in enumerate(text):
|
|
spaces = i
|
|
if c != ' ':
|
|
break
|
|
text = text[spaces:]
|
|
return spaces * ' ' + html.escape(text)
|
|
return text
|
|
|
|
def print(self, line, escape=True):
|
|
if escape:
|
|
line = self.maybe_escape(line)
|
|
if self.__html:
|
|
line += '<br/>'
|
|
print(line)
|
|
|
|
def read_linkify(self, filename):
|
|
with open(filename, 'r') as fd:
|
|
data = json.load(fd)
|
|
self.__link_prefixes = [(e["prefix"], e["link"]) for e in data]
|
|
|
|
def __open_pipe(self):
|
|
if not self.__pipe:
|
|
opt = {}
|
|
if sys.version_info.major > 2:
|
|
opt['encoding'] = 'utf-8'
|
|
self.__pipe = subprocess.Popen([self.__path, "--inlining", "--functions"],
|
|
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
|
**opt)
|
|
|
|
class __EOF(Exception):
|
|
pass
|
|
|
|
def __write(self, s):
|
|
print(s, file=self.__pipe.stdin)
|
|
self.__pipe.stdin.flush()
|
|
if self.__log:
|
|
print("#>> |%s|" % (s,), file=sys.stderr)
|
|
|
|
def __read(self):
|
|
s = self.__pipe.stdout.readline().rstrip()
|
|
if self.__log:
|
|
print("# << |%s|" % (s,), file=sys.stderr)
|
|
if s == '':
|
|
raise Symbolizer.__EOF
|
|
return s
|
|
|
|
def __process_source_path(self, file_name):
|
|
for path_to_cut in self.__paths_to_cut:
|
|
file_name = re.sub(".*" + path_to_cut, "", file_name)
|
|
file_name = re.sub(".*hwasan_[a-z_]*.(cc|h):[0-9]*", "[hwasan_rtl]", file_name)
|
|
file_name = re.sub(".*asan_[a-z_]*.(cc|h):[0-9]*", "[asan_rtl]", file_name)
|
|
file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
|
|
return file_name
|
|
|
|
def __process_binary_name(self, name, buildid):
|
|
if name.startswith('/'):
|
|
name = name[1:]
|
|
if buildid is not None and buildid in self.__index:
|
|
return self.__index[buildid]
|
|
|
|
for p in self.__binary_prefixes:
|
|
full_path = os.path.join(p, name)
|
|
if os.path.exists(full_path):
|
|
return full_path
|
|
apex_prefix = "apex/com.android."
|
|
if name.startswith(apex_prefix):
|
|
full_path = os.path.join(p, "apex/com.google.android." + name[len(apex_prefix):])
|
|
if os.path.exists(full_path):
|
|
return full_path
|
|
# Try stripping extra path components as the last resort.
|
|
for p in self.__binary_prefixes:
|
|
full_path = os.path.join(p, os.path.basename(name))
|
|
if os.path.exists(full_path):
|
|
return full_path
|
|
if name not in self.__warnings:
|
|
print("Could not find symbols for", name, file=sys.stderr)
|
|
self.__warnings.add(name)
|
|
return None
|
|
|
|
def iter_locals(self, binary, addr, buildid):
|
|
self.__open_pipe()
|
|
p = self.__pipe
|
|
binary = self.__process_binary_name(binary, buildid)
|
|
if not binary:
|
|
return
|
|
self.__write("FRAME %s %s" % (binary, addr))
|
|
try:
|
|
while True:
|
|
function_name = self.__read()
|
|
local_name = self.__read()
|
|
file_line = self.__read()
|
|
extra = self.__read().split()
|
|
|
|
file_line = self.__process_source_path(file_line)
|
|
offset = None if extra[0] == '??' else int(extra[0])
|
|
size = None if extra[1] == '??' else int(extra[1])
|
|
tag_offset = None if extra[2] == '??' else int(extra[2])
|
|
yield (function_name, file_line, local_name, offset, size, tag_offset)
|
|
except Symbolizer.__EOF:
|
|
pass
|
|
|
|
def iter_call_stack(self, binary, buildid, addr):
|
|
self.__open_pipe()
|
|
p = self.__pipe
|
|
binary = self.__process_binary_name(binary, buildid)
|
|
if not binary:
|
|
return
|
|
self.__write("CODE %s %s" % (binary, addr))
|
|
try:
|
|
while True:
|
|
function_name = self.__read()
|
|
file_line = self.__read()
|
|
file_line = self.__process_source_path(file_line)
|
|
yield (function_name, file_line)
|
|
except Symbolizer.__EOF:
|
|
pass
|
|
|
|
def maybe_linkify(self, file_line):
|
|
if not self.__html or not self.__link_prefixes:
|
|
return file_line
|
|
filename, line_col = file_line.split(':', 1)
|
|
if not line_col:
|
|
line = '0' # simplify the link generation
|
|
else:
|
|
line = line_col.split(':')[0]
|
|
longest_prefix = max((
|
|
(prefix, link) for prefix, link in self.__link_prefixes
|
|
if filename.startswith(prefix)),
|
|
key=lambda x: len(x[0]), default=None)
|
|
if longest_prefix is None:
|
|
return file_line
|
|
else:
|
|
prefix, link = longest_prefix
|
|
return '<a href="{}">{}</a>'.format(
|
|
html.escape(link.format(file=filename[len(prefix):], line=line,
|
|
file_line=file_line, prefix=prefix)), file_line)
|
|
|
|
def build_index(self):
|
|
for p in self.__binary_prefixes:
|
|
for dname, _, fnames in os.walk(p):
|
|
for fn in fnames:
|
|
filename = os.path.join(dname, fn)
|
|
try:
|
|
bid = get_buildid(filename)
|
|
except FileNotFoundError:
|
|
continue
|
|
except Exception as e:
|
|
print("Failed to parse {}: {}".format(filename, e), file=sys.stderr)
|
|
continue
|
|
if bid is not None:
|
|
self.__index[bid] = filename
|
|
|
|
def symbolize_line(self, line):
|
|
#0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
|
|
match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)'
|
|
r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
|
|
if match:
|
|
frameno = match.group(2)
|
|
binary = match.group(5)
|
|
addr = int(match.group(6), 16)
|
|
buildid = match.group(7)
|
|
|
|
frames = list(self.iter_call_stack(binary, buildid, addr))
|
|
|
|
if len(frames) > 0:
|
|
self.print(
|
|
self.maybe_escape(
|
|
"%s#%s%s%s in " % (match.group(1), match.group(2), match.group(3),
|
|
frames[0][0])
|
|
) + self.maybe_linkify(frames[0][1]),
|
|
escape=False)
|
|
for i in range(1, len(frames)):
|
|
space1 = ' ' * match.end(1)
|
|
space2 = ' ' * (match.start(4) - match.end(1) - 2)
|
|
self.print(
|
|
self.maybe_escape("%s->%s%s in " % (space1, space2, frames[i][0]))
|
|
+ self.maybe_linkify(frames[i][1]), escape=False)
|
|
else:
|
|
self.print(line.rstrip())
|
|
else:
|
|
self.print(line.rstrip())
|
|
|
|
def save_access_address(self, line):
|
|
match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE)
|
|
if match:
|
|
self.__last_access_address = int(match.group(2), 16)
|
|
match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+(\([0-9a-f]+\))? \(ptr/mem\)', line, re.UNICODE)
|
|
if match:
|
|
self.__last_access_tag = int(match.group(2), 16)
|
|
|
|
def process_stack_history(self, line, ignore_tags=False):
|
|
if self.__last_access_address is None or self.__last_access_tag is None:
|
|
return
|
|
if re.match(r'Previously allocated frames:', line, re.UNICODE):
|
|
return True
|
|
pc_mask = (1 << 48) - 1
|
|
fp_mask = (1 << 20) - 1
|
|
# record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
|
|
match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)'
|
|
r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
|
|
if match:
|
|
record_addr = int(match.group(2), 16)
|
|
record = int(match.group(3), 16)
|
|
binary = match.group(4)
|
|
addr = int(match.group(5), 16)
|
|
buildid = match.group(6)
|
|
base_tag = (record_addr >> 3) & 0xFF
|
|
fp = (record >> 48) << 4
|
|
pc = record & pc_mask
|
|
|
|
for local in self.iter_locals(binary, addr, buildid):
|
|
frame_offset = local[3]
|
|
size = local[4]
|
|
if frame_offset is None or size is None:
|
|
continue
|
|
obj_offset = (self.__last_access_address - fp - frame_offset) & fp_mask
|
|
if obj_offset >= size:
|
|
continue
|
|
tag_offset = local[5]
|
|
if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != self.__last_access_tag):
|
|
continue
|
|
self.print('')
|
|
self.print('Potentially referenced stack object:')
|
|
self.print(' %d bytes inside a variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]))
|
|
self.print(' at %s' % (local[1],))
|
|
return True
|
|
return False
|
|
|
|
def extract_version(s):
|
|
idx = s.rfind('-')
|
|
if idx == -1:
|
|
return 0
|
|
x = float(s[idx + 1:])
|
|
return x
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('-d', action='store_true')
|
|
parser.add_argument('-v', action='store_true')
|
|
parser.add_argument('--ignore-tags', action='store_true')
|
|
parser.add_argument('--symbols', action='append')
|
|
parser.add_argument('--source', action='append')
|
|
parser.add_argument('--index', action='store_true')
|
|
parser.add_argument('--symbolizer')
|
|
parser.add_argument('--linkify', type=str)
|
|
parser.add_argument('--html', action='store_true')
|
|
parser.add_argument('args', nargs=argparse.REMAINDER)
|
|
args = parser.parse_args()
|
|
|
|
# Unstripped binaries location.
|
|
binary_prefixes = args.symbols or []
|
|
if not binary_prefixes:
|
|
if 'ANDROID_PRODUCT_OUT' in os.environ:
|
|
product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols')
|
|
binary_prefixes.append(product_out)
|
|
binary_prefixes.append('/')
|
|
|
|
for p in binary_prefixes:
|
|
if not os.path.isdir(p):
|
|
print("Symbols path does not exist or is not a directory:", p, file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Source location.
|
|
paths_to_cut = args.source or []
|
|
if not paths_to_cut:
|
|
paths_to_cut.append(os.getcwd() + '/')
|
|
if 'ANDROID_BUILD_TOP' in os.environ:
|
|
paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/')
|
|
|
|
# llvm-symbolizer binary.
|
|
# 1. --symbolizer flag
|
|
# 2. environment variable
|
|
# 3. unsuffixed binary in the current directory
|
|
# 4. if inside Android platform, prebuilt binary at a known path
|
|
# 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
|
|
# highest available version in $PATH
|
|
symbolizer_path = args.symbolizer
|
|
if not symbolizer_path:
|
|
if 'LLVM_SYMBOLIZER_PATH' in os.environ:
|
|
symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH']
|
|
elif 'HWASAN_SYMBOLIZER_PATH' in os.environ:
|
|
symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH']
|
|
|
|
if not symbolizer_path:
|
|
s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer')
|
|
if os.path.exists(s):
|
|
symbolizer_path = s
|
|
|
|
if not symbolizer_path:
|
|
if 'ANDROID_BUILD_TOP' in os.environ:
|
|
s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer')
|
|
if os.path.exists(s):
|
|
symbolizer_path = s
|
|
|
|
if not symbolizer_path:
|
|
for path in os.environ["PATH"].split(os.pathsep):
|
|
p = os.path.join(path, 'llvm-symbolizer')
|
|
if os.path.exists(p):
|
|
symbolizer_path = p
|
|
break
|
|
|
|
if not symbolizer_path:
|
|
for path in os.environ["PATH"].split(os.pathsep):
|
|
candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*'))
|
|
if len(candidates) > 0:
|
|
candidates.sort(key = extract_version, reverse = True)
|
|
symbolizer_path = candidates[0]
|
|
break
|
|
|
|
if not os.path.exists(symbolizer_path):
|
|
print("Symbolizer path does not exist:", symbolizer_path, file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if args.v:
|
|
print("Looking for symbols in:")
|
|
for s in binary_prefixes:
|
|
print(" %s" % (s,))
|
|
print("Stripping source path prefixes:")
|
|
for s in paths_to_cut:
|
|
print(" %s" % (s,))
|
|
print("Using llvm-symbolizer binary in:\n %s" % (symbolizer_path,))
|
|
print()
|
|
|
|
symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
|
|
symbolizer.enable_html(args.html)
|
|
symbolizer.enable_logging(args.d)
|
|
if args.index:
|
|
symbolizer.build_index()
|
|
|
|
if args.linkify:
|
|
if not args.html:
|
|
print('Need --html to --linkify', file=sys.stderr)
|
|
sys.exit(1)
|
|
symbolizer.read_linkify(args.linkify)
|
|
|
|
for line in sys.stdin:
|
|
if sys.version_info.major < 3:
|
|
line = line.decode('utf-8')
|
|
symbolizer.save_access_address(line)
|
|
if symbolizer.process_stack_history(line, ignore_tags=args.ignore_tags):
|
|
continue
|
|
symbolizer.symbolize_line(line)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|