Restoring Function Information to Stripped Binaries Using Dyninst Emily Jacobson and Nathan Rosenblum Binary Tools Need Symbol Tables Debugging Tools GDB IDA Pro Instrumentation Tools ID: 413554
Download Presentation The PPT/PDF document "unstrip" is the property of its rightful owner. Permission is granted to download and print the materials on this web site for personal, non-commercial use only, and to display it on your personal computer provided you do not modify the materials and that you retain all copyright notices contained in the materials. By downloading content from our website, you accept the terms of this agreement.
Slide1
unstrip: Restoring Function Information to Stripped Binaries Using Dyninst
Emily Jacobson and Nathan
RosenblumSlide2
Binary Tools Need Symbol TablesDebugging Tools
GDB, IDA Pro…
Instrumentation Tools
PIN, Dyninst,…Static Analysis ToolsCodeSurfer/x86,…Security Analysis ToolsIDA Pro,…
2
unstrip
: Restoring Function Information to Stripped BinariesSlide3
<targ8056f50>:
push %
ebp
mov
%
esp
,%ebpsub %0x8,%espmov 0x8(%ebp),%eaxadd $0xfffffff8,%esppush %eaxcall <targ80c3bd0>push %eaxcall <targ8057220>mov %ebp,%esppop %ebp
3
unstrip: Restoring Function Information to Stripped Binaries
push %ebpmov %esp,%ebpsub %0x8,%espmov 0x8(%ebp),%eaxadd $0xfffffff8,%esppush %eaxcall 80c3bd0push %eaxcall 8057220mov %ebp,%esppop %ebp
unstrip
unstrip
= stripped parsing
+
binary rewritingSlide4
New Semantic InformationImportant semantic information:
program’s interaction with the operating system
(
system calls)These calls are encapsulated in wrapper functions
Library fingerprinting: identify functions based on patterns learned from exemplar libraries
4
unstrip: Restoring Function Information to Stripped BinariesSlide5
<targ8056f50>:
push %
ebp
mov
%
esp
,%ebpsub %0x8,%espmov 0x8(%ebp),%eaxadd $0xfffffff8,%esppush %eaxcall <targ80c3bd0>push %eaxcall <targ8057220>mov %ebp,%esppop %ebp
5
unstrip: Restoring Function Information to Stripped Binaries
push %ebpmov %esp,%ebpsub %0x8,%espmov 0x8(%ebp),%eaxadd $0xfffffff8,%esppush %eaxcall 80c3bd0push %eaxcall 8057220mov %ebp,%esppop %ebp
unstrip
= stripped parsing +
+
binary rewriting
library fingerprinting
call
<
getpid
>
call
<kill>
unstripSlide6
<accept>:
m
ov
%
ebx
, %
edxmov %0x66,%eaxmov $0x5,%ebxlea 0x4(%esp),%ecxint $0x80mov %edx, %ebxcmp %0xffffff83,%eaxjae 8048300retmov
%esi
,%esi
mov %0x66,%eaxmov $0x5,%ebxlea 0x4(%esp),%ecxSet up system call argumentsint $0x80Invoke a system callmov %edx, %ebxcmp %0xffffff83,%eaxjae 8048300retError check and returnSlide7
<accept>:
m
ov
%
ebx
, %
edxmov %0x66,%eaxmov $0x5,%ebxlea 0x4(%esp),%ecxint $0x80mov %edx, %ebxcmp %0xffffff83,%eaxjae 8048300ret
mov %
esi,%esi
mov %0x66,%eaxmov $0x5,%ebxlea 0x4(%esp),%ecxint $0x80mov %edx, %ebxcmp %0xffffff83,%eaxjae 8048300ret<accept>:cmpl $0x0,%gs:0xcjne 80f669c
mov
%ebx, %edxm
ov %0x66,%eax
m
ov
$0x5,%ebx
lea 0x4(%
esp
),%
ecx
call *0x814e93c
m
ov
%
edx
, %ebxcmp
%0xffffff83,%eaxjae 8048460retpush %esicall libc_enable_asyncancelmov %eax,%esi
mov %ebx,%edxmov
$0x66,%eaxmov $0x5,%ebxlea 0x8(%esp
),%
ecx
c
all *0x8181578
m
ov
%
edx
, %
ebx
x
chg
%
eax
,%
esicall libc_disable_acynancelmov %esi,%eaxpop %esicmp $0xffffff83,%eaxjae syscall_errorret
glibc 2.5 on RHEL with GCC 3.4.4
glibc 2.2.4 on RHEL
The same function can be realized in a variety of ways in the binary
<accept>:cmpl $0x0,%gs:0xcjne 80f669cmov %ebx, %edxmov %0x66,%eaxmov $0x5,%ebxlea 0x4(%esp),%ecxint $0x80mov %edx, %ebxcmp %0xffffff83,%eaxjae 8048460retpush %esicall libc_enable_asyncancelmov %eax,%esimov %ebx,%edxmov $0x66,%eaxmov $0x5,%ebxlea 0x8(%esp),%ecxint $0x80mov %edx, %ebxxchg %eax,%esicall libc_disable_acynancelmov %esi,%eaxpop %esicmp $0xffffff83,%eaxjae syscall_errorret
g
libc
2.5 on RHEL with GCC 4.1.2Slide8
Semantic DescriptorsInstead, we’ll take a semantic approach
Record information that is likely to be invariant across multiple versions of the function
8
unstrip
: Restoring Function Information to Stripped Binaries
<accept>:
mov %ebx, %edxmov %0x66,%eaxmov $0x5,%ebxlea 0x4(%esp),%ecxint $0x80mov %edx, %ebxc
mp %0xffffff83,%eax
jae 8048300
retmov %esi,%esiint $0x80mov %0x66,%eaxmov $0x5,%ebx{<socketcall >}, 5Slide9
Building Semantic Descriptors
9
unstrip
: Restoring Function Information to Stripped Binaries
We parse an input binary, locate system calls and wrapper function calls, and employ dataflow analysis.
binary
reboot:push %ebpmov %esp,%ebpsub $0x10,%esppush %edipush %ebxmov 0x8(%
ebp),%edx
mov
$0xfee1dead,%edimov $0x28121969,%ecxpush %ebxmov %edi,%ebxmov $0x58,%eaxint $0x80 …SYSTEM CALL0x580x28121969
EAX EBX ECX
%edi0xfee1
dead
{<reboot, 0xfee
1
dead, 0x2812969>}
EAXSlide10
unstrip
Building a Descriptor Database
10
unstrip
: Restoring Function Information to Stripped Binaries
Descriptor Database
<accept>:mov %ebx, %edxmov %0x66,%eaxmov
$0x5,%ebx
lea 0x4(%esp),%
ecxint $0x80…Locate wrapper functionsBuild semantic descriptors{<socketcall, 5>}: accept{<socketcall, 4>}: listen{<getpid>}: getpid…glibcreference librarySlide11
g
libc
reference library
g
libcreference library
g
libcreference libraryglibcreference libraryunstripBuilding a Descriptor Database11unstrip: Restoring Function Information to Stripped BinariesDescriptor Database
Build semantic
descriptors
Locate wrapper functions{<socketcall, 5>}: accept{<socketcall, 4>}: listen{<getpid>}: getpid…{<socketcall, 5>}: accept{<socketcall, 4>}: listen{<getpid>}: getpid…{<socketcall, 5>}: accept{<socketcall, 4>}: listen{<getpid>}: getpid…{<socketcall, 5>}: accept{<socketcall, 4>}: listen
{<getpid>}: getpid…
1
<
accept>:
mov
%
ebx
, %
edx
mov
%0x66,%eax
mov
$0x5,%ebx
lea 0x4(%
esp
),%ecxint $0x80
…1<accept>:mov %ebx, %edxmov %0x66,%eax
mov $0x5,%ebxlea 0x4(%esp),%ecxint $0x80…
1
<
accept>:
mov
%
ebx
, %
edx
mov
%0x66,%eax
mov
$0x5,%ebx
lea 0x4(%
esp
),%
ecx
int $0x80…1<accept>:mov %ebx, %edxmov %0x66,%eaxmov $0x5,%ebx
lea 0x4(%esp),%ecx
int $0x80…Slide12
Identifying Functions in a Stripped Binary
unstrip
Building a Descriptor Database
12
unstrip
: Restoring Function Information to Stripped Binaries
Build semantic
descriptors
Locate functions
Descriptor Databaseglibcreference libraryglibcreference libraryglibcreference libraryglibcreference library1
<accept>:
mov %ebx, %edxmov
%0x66,%eaxmov $0x5,%ebx
lea 0x4(%
esp
),%
ecx
int
$0x80
…
1
<
accept>:
mov
%
ebx
, %edxmov %0x66,%eaxmov $0x5,%ebxlea 0x4(%esp),%ecxint $0x80…
1<accept>:
mov %ebx, %edxmov
%0x66,%eax
mov
$0x5,%ebx
lea 0x4(%
esp
),%
ecx
int
$0x80
…
1
<
accept>:
mov
%
ebx, %edxmov %0x66,%eaxmov $0x5,%ebxlea 0x4(%esp),%ecxint $0x80…{<socketcall, 5>}: accept{<socketcall, 4>}: listen{<getpid>}: getpid
…{<socketcall
, 5>}: accept{<socketcall, 4>}: listen{<getpid>}: getpid…
{<
socketcall, 5>}: accept{<socketcall, 4>}: listen{<getpid>}: getpid…{<socketcall, 5>}: accept{<socketcall, 4>}: listen{<getpid>}: getpid…Slide13
unstrip
Identifying Functions in a Stripped Binary
13
unstrip
: Restoring Function Information to Stripped Binaries
s
tripped binaryunstripped binaryDescriptor Database
For each wrapper function {
1. Build the semantic descriptor.
2. Search the database for a match (two stages).3. Add label to symbol table.}Slide14
EvaluationTo evaluate across three dimensions of variation, we constructed three data sets:
compiler version
library version
distribution vendorIn each set, we compiled a test binary for each glibc instance, built a descriptor database, and applied unstrip and IDA Pro FLIRT
Our evaluation measure is accuracy
14
unstrip: Restoring Function Information to Stripped BinariesSlide15
Evaluation Results: Compiler Version Study
15
unstrip
: Restoring Function Information to Stripped BinariesSlide16
Evaluation Results: Library Version Study
16
unstrip
: Restoring Function Information to Stripped BinariesSlide17
Evaluation Results: Distribution Study
17
unstrip
: Restoring Function Information to Stripped BinariesSlide18
18
unstrip
: Restoring Function Information to Stripped Binaries
For full details, tech report available online
unstrip is available at:
http://www.paradyn.org/html/tools/unstrip.html
Come see the unstrip demo today at 2:00 or 2:30 (in 1260 WID/MIR)