Skip to content

HLIL Readability: Allow Flattening Deep Nesting From Guard Clauses #7927

@NWPlayer123

Description

@NWPlayer123

Version and Platform (required):

  • Binary Ninja Version: Binary Ninja Personal 5.3.9003-dev
  • Edition: Non-Commercial
  • OS: CachyOS / COSMIC DE
  • OS Version: Rolling Release
  • CPU Architecture: x86_64

Bug Description:
Although Binary Ninja's extremely good about eliminating gotos and resolving to sane code, it still struggles with "guard clauses" and produces extremely nested if/else statements, instead of inverting and early-returning.

Steps To Reproduce:
Load the binary, navigate to the example function in question (see below), poke around with the HLIL/Pseudo-C.

Expected Behavior:
I want readable code that's closer to the developer's intended output.

Binary:
free force deserializes grandly

Additional Information:
Here's the original Pseudo-C showing the main problem, note that set_error(ctx); return input; could be injected to massively simplify the codeflow. I did notice that you can toggle "Show Condition as Inverted", but that still leaves the else clause which adds to the amount of nesting, I want something more like a "convert to standalone block" button, or just better HLIL/Pseudo-C codegen in the first place, although I know how difficult of a problem that can be.

100018d7    char* sub_100018d7(char* arg1, struct demangle_ctx* arg2)

100018d7    {
100018d7        char* input = arg1;
100018e6        char* input_3 = nullptr;
100018e6        
100018f9        if (peek_char(input, arg2) == 0x43)
100018f9        {
100018ff            input_3 = input;
10001902            arg2->suppress += 1;
10001910            input = sub_100042b5(input, arg2);
10001912            arg2->suppress -= 1;
100018f9        }
100018f9        
10001920        char var_5 = peek_char(input, arg2);
10001920        
10001935        if (isdigit((uint32_t)var_5))
10001935        {
1000195d            emit_char(0x26, arg2);
10001952            return sub_100042e4(input, 0, arg2);
10001935        }
10001935        
1000195d        if (var_5 != 0x4c)
1000195d        {
10001cdc            if (var_5 == 0x5a)
10001cee                return parse_template_param(input, 1, arg2);
10001cee            
10001cf9            if (var_5 == 0x4f)
10001d09                return sub_10001d20(input, arg2);
10001d09            
10001d11            set_error(arg2);
1000195d        }
1000195d        else if (input_3)
10001967        {
1000198c            char* saved_end;
1000198c            int32_t length;
1000198c            
1000198c            if (peek_char(&input[1], arg2) == 0x4d)
1000198c            {
10001af7                char* input_1 = &input[2];
10001af7                
10001b10                while (isdigit((uint32_t)peek_char(input_1, arg2)))
10001b10                    input_1 = &input_1[1];
10001b10                
10001b27                input = expect_underscore(input_1, arg2);
10001b27                
10001b35                if (peek_char(input, arg2) == 0x4c)
10001b35                {
10001b4e                    char* input_2;
10001b4e                    
10001b58                    if (peek_char(&input[1], arg2) != 0x5f)
10001b89                        input_2 = set_parse_window(&input[1], &length, &saved_end, arg2);
10001b58                    else
10001b70                        input_2 =
10001b70                            parse_length_prefix(&input[1], &length, &saved_end, arg2);
10001b70                    
10001b8b                    char* input_4 = input_2;
10001b8b                    
10001ba3                    while (isdigit((uint32_t)peek_char(input_2, arg2))
10001ba3                            || peek_char(input_2, arg2) == 0x6e)
10001ba3                        input_2 = &input_2[1];
10001ba3                    
10001bc1                    arg2->end_ptr = saved_end;
10001bce                    input = expect_underscore(input_2, arg2);
10001bce                    
10001bd3                    if (*(uint8_t*)input_4 == 0x6e)
10001bd3                    {
10001bdc                        emit_char(0x26, arg2);
10001bf0                        sub_10003375(&input_3[2], 0, nullptr, arg2);
10001bfe                        emit_string("::", arg2);
10001c12                        return sub_100042e4(input, 1, arg2);
10001bd3                    }
10001bd3                    
10001c25                    if (peek_char(input, arg2) == 0x30)
10001c25                    {
10001c39                        input = &input[1];
10001c39                        
10001c49                        if (length != 1 || *(uint8_t*)input_4 != 0x30)
10001c49                        {
10001c7c                            emit_char(0x26, arg2);
10001c90                            sub_10003375(&input_3[2], 0, nullptr, arg2);
10001c9e                            emit_string("::", arg2);
10001cac                            emit_string("virtual-function-", arg2);
10001cac                            
10001cd1                            while (length > 0)
10001cd1                            {
10001cd1                                emit_char(*(uint8_t*)input_4, arg2);
10001cc6                                length -= 1;
10001cca                                input_4 = &input_4[1];
10001cd1                            }
10001c49                        }
10001c49                        else
10001c49                        {
10001c52                            emit_char(0x28, arg2);
10001c5e                            sub_100042b5(input_3, arg2);
10001c6c                            emit_string(")0", arg2);
10001c49                        }
10001c25                    }
10001c25                    else
10001c2c                        set_error(arg2);
10001b35                }
10001b35                else
10001b3c                    set_error(arg2);
1000198c            }
1000198c            else
1000198c            {
10001992                int32_t eax_9 = 0;
10001992                
100019ae                if (&input_3[2] == input && input_3[1] == 0x62)
100019b4                    eax_9 = 1;
100019b4                
100019be                if (!eax_9)
100019be                {
100019c7                    emit_char(0x28, arg2);
100019d7                    sub_100042b5(&input_3[1], arg2);
100019e2                    emit_char(0x29, arg2);
100019be                }
100019be                
100019ff                input = parse_length_prefix(&input[1], &length, &saved_end, arg2);
10001a01                int32_t var_c = 0;
10001a01                
10001aba                while (true)
10001aba                {
10001aba                    if (length <= 0)
10001aba                    {
10001ac3                        arg2->end_ptr = saved_end;
10001ac3                        
10001ac9                        if (eax_9)
10001ac9                        {
10001ad3                            char* str;
10001ad3                            
10001ad3                            str = !var_c ? "false" : "true";
10001ad3                            
10001aea                            emit_string(str, arg2);
10001ac9                        }
10001ac9                        
10001ac9                        break;
10001aba                    }
10001aba                    
10001a17                    char ch = peek_char(input, arg2);
10001a1a                    int32_t ch_1 = (int32_t)ch;
10001a1a                    
10001a41                    if (ch_1 == 0x64)
10001a7e                        ch = 0x2e;
10001a41                    else if (ch_1 == 0x6e)
10001a75                        ch = 0x2d;
10001a4a                    else if (ch_1 == 0x70)
10001a6c                        ch = 0x2b;
10001a6c                    
10001a33                    if (ch_1 == 0x5f || !ch_1)
10001a33                    {
10001a5f                        set_error(arg2);
10001a67                        break;
10001a33                    }
10001a33                    
10001a85                    if (!eax_9)
10001aa7                        emit_char(ch, arg2);
10001a85                    else if (ch != 0x30)
10001a95                        var_c = 1;
10001a95                    
10001aaf                    length -= 1;
10001ab3                    input = &input[1];
10001aba                }
1000198c            }
10001967        }
10001967        else
1000196e            set_error(arg2);
1000196e        
10001d1f        return input;
100018d7    }

Here's a hand-decompiled version that's probably a lot closer to the developer's intent:

static char* parse_expression(char* input, demangle_ctx* ctx) {
    char* cast_type = nullptr;

    if (peek_char(input, ctx) == 'C') {
        cast_type = input;
        ctx->suppress++;
        input = parse_type_for_output(input, ctx);
        ctx->suppress--;
    }

    char ch = peek_char(input, ctx);

    if (isdigit(ch)) {
        emit_char('&', ctx);
        return parse_address_literal(input, 0, ctx);
    }

    if (ch == 'L') {
        if (!cast_type) {
            set_error(ctx);
            return input;
        }

        char* saved_end;
        uint32_t length;

        if (peek_char(input + 1, ctx) == 'M') {
            input += 2;

            while (isdigit(peek_char(input, ctx))) {
                input++;
            }

            input = expect_underscore(input, ctx);

            if (peek_char(input, ctx) != 'L') {
                set_error(ctx);
                return input;
            }

            if (peek_char(input + 1, ctx) == '_') {
                input = parse_length_prefix(input + 1, &length, &saved_end, ctx);
            } else {
                input = set_parse_window(input + 1, &length, &saved_end, ctx);
            }

            // Keep an extra pointer so we can refer to earlier in the string
            char* literal_start = input;

            while (isdigit(peek_char(input, ctx)) || peek_char(input, ctx) == 'n') {
                input++;
            }

            ctx->end_ptr = saved_end;
            input = expect_underscore(input, ctx);

            if (*literal_start == 'n') {
                emit_char('&', ctx);
                parse_qualified_name(cast_type + 2, 0, nullptr, ctx);
                emit_string("::", ctx);
                return parse_address_literal(input, 1, ctx);
            }

            if (peek_char(input, ctx) != '0') {
                set_error(ctx);
                return input;
            }

            input++;
            if (length != 1 || *literal_start != '0') {
                emit_char('&', ctx);
                parse_qualified_name(cast_type + 2, 0, nullptr, ctx);
                emit_string("::", ctx);
                emit_string("virtual-function-", ctx);
                
                while (length > 0) {
                    emit_char(*literal_start, ctx);
                    length--;
                    literal_start++;
                }
            } else {
                emit_char('(', ctx);
                parse_type_for_output(cast_type, ctx);
                emit_string(")0", ctx);
            }

            return input;
        }

        bool is_bool = false;

        if (cast_type + 2 == input && cast_type[1] == 'b') {
            is_bool = true;
        }

        if (!is_bool) {
            emit_char('(', ctx);
            parse_type_for_output(cast_type + 1, ctx);
            emit_char(')', ctx);
        }

        input = parse_length_prefix(input + 1, &length, &saved_end, ctx);
        bool saw_nonzero = false;

        while (length > 0) {
            ch = peek_char(input, ctx);

            if (ch == 'd') {
                ch = '.';
            } else if (ch == 'n') {
                ch = '-';
            } else if (ch == 'p') {
                ch = '+';
            }

            if (ch == '_' || !ch) {
                set_error(ctx);
                return input;
            }

            if (!is_bool) {
                emit_char(ch, ctx);
            } else if (ch != '0') {
                saw_nonzero = true;
            }

            length--;
            input++;
        }

        ctx->end_ptr = saved_end;

        if (is_bool) {
            emit_string(saw_nonzero ? "true" : "false", ctx);
        }

        return input;
    }

    if (ch == 'Z') {
        return parse_template_param(input, true, ctx);
    }

    if (ch == 'O') {
        return parse_operator_expr(input, ctx);
    }

    set_error(ctx);
    return input;
}

On a related note, it would be nice to be able to swap between "&input[2]" and "input + 2", Binja seems to lean heavily on the former, but only in HLIL/Pseudo-C, MLIL/LLIL get it correctly, and little stuff like "if you're comparing a char to an int, check to see if it's a valid char literal like 'n'", but those are probably out of scope for this issue.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions