-
Notifications
You must be signed in to change notification settings - Fork 275
Description
Version and Platform (required):
- Binary Ninja Version: Binary Ninja Personal 5.3.9003-dev
- Edition: Non-Commercial
- OS: CachyOS / COSMIC DE
- OS Version: Rolling Release
- CPU Architecture: x86_64
Bug Description:
Although Binary Ninja's extremely good about eliminating gotos and resolving to sane code, it still struggles with "guard clauses" and produces extremely nested if/else statements, instead of inverting and early-returning.
Steps To Reproduce:
Load the binary, navigate to the example function in question (see below), poke around with the HLIL/Pseudo-C.
Expected Behavior:
I want readable code that's closer to the developer's intended output.
Binary:
free force deserializes grandly
Additional Information:
Here's the original Pseudo-C showing the main problem, note that set_error(ctx); return input; could be injected to massively simplify the codeflow. I did notice that you can toggle "Show Condition as Inverted", but that still leaves the else clause which adds to the amount of nesting, I want something more like a "convert to standalone block" button, or just better HLIL/Pseudo-C codegen in the first place, although I know how difficult of a problem that can be.
100018d7 char* sub_100018d7(char* arg1, struct demangle_ctx* arg2)
100018d7 {
100018d7 char* input = arg1;
100018e6 char* input_3 = nullptr;
100018e6
100018f9 if (peek_char(input, arg2) == 0x43)
100018f9 {
100018ff input_3 = input;
10001902 arg2->suppress += 1;
10001910 input = sub_100042b5(input, arg2);
10001912 arg2->suppress -= 1;
100018f9 }
100018f9
10001920 char var_5 = peek_char(input, arg2);
10001920
10001935 if (isdigit((uint32_t)var_5))
10001935 {
1000195d emit_char(0x26, arg2);
10001952 return sub_100042e4(input, 0, arg2);
10001935 }
10001935
1000195d if (var_5 != 0x4c)
1000195d {
10001cdc if (var_5 == 0x5a)
10001cee return parse_template_param(input, 1, arg2);
10001cee
10001cf9 if (var_5 == 0x4f)
10001d09 return sub_10001d20(input, arg2);
10001d09
10001d11 set_error(arg2);
1000195d }
1000195d else if (input_3)
10001967 {
1000198c char* saved_end;
1000198c int32_t length;
1000198c
1000198c if (peek_char(&input[1], arg2) == 0x4d)
1000198c {
10001af7 char* input_1 = &input[2];
10001af7
10001b10 while (isdigit((uint32_t)peek_char(input_1, arg2)))
10001b10 input_1 = &input_1[1];
10001b10
10001b27 input = expect_underscore(input_1, arg2);
10001b27
10001b35 if (peek_char(input, arg2) == 0x4c)
10001b35 {
10001b4e char* input_2;
10001b4e
10001b58 if (peek_char(&input[1], arg2) != 0x5f)
10001b89 input_2 = set_parse_window(&input[1], &length, &saved_end, arg2);
10001b58 else
10001b70 input_2 =
10001b70 parse_length_prefix(&input[1], &length, &saved_end, arg2);
10001b70
10001b8b char* input_4 = input_2;
10001b8b
10001ba3 while (isdigit((uint32_t)peek_char(input_2, arg2))
10001ba3 || peek_char(input_2, arg2) == 0x6e)
10001ba3 input_2 = &input_2[1];
10001ba3
10001bc1 arg2->end_ptr = saved_end;
10001bce input = expect_underscore(input_2, arg2);
10001bce
10001bd3 if (*(uint8_t*)input_4 == 0x6e)
10001bd3 {
10001bdc emit_char(0x26, arg2);
10001bf0 sub_10003375(&input_3[2], 0, nullptr, arg2);
10001bfe emit_string("::", arg2);
10001c12 return sub_100042e4(input, 1, arg2);
10001bd3 }
10001bd3
10001c25 if (peek_char(input, arg2) == 0x30)
10001c25 {
10001c39 input = &input[1];
10001c39
10001c49 if (length != 1 || *(uint8_t*)input_4 != 0x30)
10001c49 {
10001c7c emit_char(0x26, arg2);
10001c90 sub_10003375(&input_3[2], 0, nullptr, arg2);
10001c9e emit_string("::", arg2);
10001cac emit_string("virtual-function-", arg2);
10001cac
10001cd1 while (length > 0)
10001cd1 {
10001cd1 emit_char(*(uint8_t*)input_4, arg2);
10001cc6 length -= 1;
10001cca input_4 = &input_4[1];
10001cd1 }
10001c49 }
10001c49 else
10001c49 {
10001c52 emit_char(0x28, arg2);
10001c5e sub_100042b5(input_3, arg2);
10001c6c emit_string(")0", arg2);
10001c49 }
10001c25 }
10001c25 else
10001c2c set_error(arg2);
10001b35 }
10001b35 else
10001b3c set_error(arg2);
1000198c }
1000198c else
1000198c {
10001992 int32_t eax_9 = 0;
10001992
100019ae if (&input_3[2] == input && input_3[1] == 0x62)
100019b4 eax_9 = 1;
100019b4
100019be if (!eax_9)
100019be {
100019c7 emit_char(0x28, arg2);
100019d7 sub_100042b5(&input_3[1], arg2);
100019e2 emit_char(0x29, arg2);
100019be }
100019be
100019ff input = parse_length_prefix(&input[1], &length, &saved_end, arg2);
10001a01 int32_t var_c = 0;
10001a01
10001aba while (true)
10001aba {
10001aba if (length <= 0)
10001aba {
10001ac3 arg2->end_ptr = saved_end;
10001ac3
10001ac9 if (eax_9)
10001ac9 {
10001ad3 char* str;
10001ad3
10001ad3 str = !var_c ? "false" : "true";
10001ad3
10001aea emit_string(str, arg2);
10001ac9 }
10001ac9
10001ac9 break;
10001aba }
10001aba
10001a17 char ch = peek_char(input, arg2);
10001a1a int32_t ch_1 = (int32_t)ch;
10001a1a
10001a41 if (ch_1 == 0x64)
10001a7e ch = 0x2e;
10001a41 else if (ch_1 == 0x6e)
10001a75 ch = 0x2d;
10001a4a else if (ch_1 == 0x70)
10001a6c ch = 0x2b;
10001a6c
10001a33 if (ch_1 == 0x5f || !ch_1)
10001a33 {
10001a5f set_error(arg2);
10001a67 break;
10001a33 }
10001a33
10001a85 if (!eax_9)
10001aa7 emit_char(ch, arg2);
10001a85 else if (ch != 0x30)
10001a95 var_c = 1;
10001a95
10001aaf length -= 1;
10001ab3 input = &input[1];
10001aba }
1000198c }
10001967 }
10001967 else
1000196e set_error(arg2);
1000196e
10001d1f return input;
100018d7 }Here's a hand-decompiled version that's probably a lot closer to the developer's intent:
static char* parse_expression(char* input, demangle_ctx* ctx) {
char* cast_type = nullptr;
if (peek_char(input, ctx) == 'C') {
cast_type = input;
ctx->suppress++;
input = parse_type_for_output(input, ctx);
ctx->suppress--;
}
char ch = peek_char(input, ctx);
if (isdigit(ch)) {
emit_char('&', ctx);
return parse_address_literal(input, 0, ctx);
}
if (ch == 'L') {
if (!cast_type) {
set_error(ctx);
return input;
}
char* saved_end;
uint32_t length;
if (peek_char(input + 1, ctx) == 'M') {
input += 2;
while (isdigit(peek_char(input, ctx))) {
input++;
}
input = expect_underscore(input, ctx);
if (peek_char(input, ctx) != 'L') {
set_error(ctx);
return input;
}
if (peek_char(input + 1, ctx) == '_') {
input = parse_length_prefix(input + 1, &length, &saved_end, ctx);
} else {
input = set_parse_window(input + 1, &length, &saved_end, ctx);
}
// Keep an extra pointer so we can refer to earlier in the string
char* literal_start = input;
while (isdigit(peek_char(input, ctx)) || peek_char(input, ctx) == 'n') {
input++;
}
ctx->end_ptr = saved_end;
input = expect_underscore(input, ctx);
if (*literal_start == 'n') {
emit_char('&', ctx);
parse_qualified_name(cast_type + 2, 0, nullptr, ctx);
emit_string("::", ctx);
return parse_address_literal(input, 1, ctx);
}
if (peek_char(input, ctx) != '0') {
set_error(ctx);
return input;
}
input++;
if (length != 1 || *literal_start != '0') {
emit_char('&', ctx);
parse_qualified_name(cast_type + 2, 0, nullptr, ctx);
emit_string("::", ctx);
emit_string("virtual-function-", ctx);
while (length > 0) {
emit_char(*literal_start, ctx);
length--;
literal_start++;
}
} else {
emit_char('(', ctx);
parse_type_for_output(cast_type, ctx);
emit_string(")0", ctx);
}
return input;
}
bool is_bool = false;
if (cast_type + 2 == input && cast_type[1] == 'b') {
is_bool = true;
}
if (!is_bool) {
emit_char('(', ctx);
parse_type_for_output(cast_type + 1, ctx);
emit_char(')', ctx);
}
input = parse_length_prefix(input + 1, &length, &saved_end, ctx);
bool saw_nonzero = false;
while (length > 0) {
ch = peek_char(input, ctx);
if (ch == 'd') {
ch = '.';
} else if (ch == 'n') {
ch = '-';
} else if (ch == 'p') {
ch = '+';
}
if (ch == '_' || !ch) {
set_error(ctx);
return input;
}
if (!is_bool) {
emit_char(ch, ctx);
} else if (ch != '0') {
saw_nonzero = true;
}
length--;
input++;
}
ctx->end_ptr = saved_end;
if (is_bool) {
emit_string(saw_nonzero ? "true" : "false", ctx);
}
return input;
}
if (ch == 'Z') {
return parse_template_param(input, true, ctx);
}
if (ch == 'O') {
return parse_operator_expr(input, ctx);
}
set_error(ctx);
return input;
}On a related note, it would be nice to be able to swap between "&input[2]" and "input + 2", Binja seems to lean heavily on the former, but only in HLIL/Pseudo-C, MLIL/LLIL get it correctly, and little stuff like "if you're comparing a char to an int, check to see if it's a valid char literal like 'n'", but those are probably out of scope for this issue.