From 52a63f9d7fb942f1413e39bf2cb1e258c0504df0 Mon Sep 17 00:00:00 2001 From: Brandon Miller Date: Thu, 5 Feb 2026 11:16:13 -0500 Subject: [PATCH 1/3] Share context between BB recovery, lifing, and disassembly Adds a GetInstructionTextWithContext callback to the architecture class that can be used to pass data from AnalyzeBasicBlocks. This same context is also supplied to LiftFunction and allows for supplying shared function and/or binary view level information across basic block analysis, function lifting, and disassembly text rendering --- architecture.cpp | 72 +++++ binaryninjaapi.h | 33 ++ binaryninjacore.h | 10 + python/architecture.py | 658 +++++++++++++++++++++------------------ rust/src/architecture.rs | 114 +++++++ 5 files changed, 586 insertions(+), 301 deletions(-) diff --git a/architecture.cpp b/architecture.cpp index 1901438f0..c65f6b2d8 100644 --- a/architecture.cpp +++ b/architecture.cpp @@ -518,6 +518,7 @@ FunctionLifterContext::FunctionLifterContext(LowLevelILFunction* func, BNFunctio m_inlinedCalls.insert(context->inlinedCalls[i]); } + m_functionArchContext = context->functionArchContext; m_containsInlinedFunctions = context->containsInlinedFunctions; } @@ -582,6 +583,12 @@ std::set& FunctionLifterContext::GetInlinedCalls() } +void *FunctionLifterContext::GetFunctionArchContext() +{ + return m_functionArchContext; +} + + void FunctionLifterContext::SetContainsInlinedFunctions(bool value) { *m_containsInlinedFunctions = value; @@ -730,6 +737,26 @@ bool Architecture::GetInstructionTextCallback( } +bool Architecture::GetInstructionTextWithContextCallback(void* ctxt, const uint8_t* data, uint64_t addr, size_t* len, + void* context, BNInstructionTextToken** result, size_t* count) +{ + CallbackRef arch(ctxt); + + vector tokens; + bool ok = arch->GetInstructionTextWithContext(data, addr, *len, context, tokens); + if (!ok) + { + *result = nullptr; + *count = 0; + return false; + } + + *count = tokens.size(); + *result = InstructionTextToken::CreateInstructionTextTokenList(tokens); + return true; +} + + void Architecture::FreeInstructionTextCallback(BNInstructionTextToken* tokens, size_t count) { for (size_t i = 0; i < count; i++) @@ -772,6 +799,13 @@ bool Architecture::LiftFunctionCallback(void* ctxt, BNLowLevelILFunction* functi } +void Architecture::FreeFunctionArchContextCallback(void* ctxt, void* context) +{ + CallbackRef arch(ctxt); + arch->FreeFunctionArchContext(context); +} + + char* Architecture::GetRegisterNameCallback(void* ctxt, uint32_t reg) { CallbackRef arch(ctxt); @@ -1260,10 +1294,12 @@ void Architecture::Register(Architecture* arch) callbacks.getAssociatedArchitectureByAddress = GetAssociatedArchitectureByAddressCallback; callbacks.getInstructionInfo = GetInstructionInfoCallback; callbacks.getInstructionText = GetInstructionTextCallback; + callbacks.getInstructionTextWithContext = GetInstructionTextWithContextCallback; callbacks.freeInstructionText = FreeInstructionTextCallback; callbacks.getInstructionLowLevelIL = GetInstructionLowLevelILCallback; callbacks.analyzeBasicBlocks = AnalyzeBasicBlocksCallback; callbacks.liftFunction = LiftFunctionCallback; + callbacks.freeFunctionArchContext = FreeFunctionArchContextCallback; callbacks.getRegisterName = GetRegisterNameCallback; callbacks.getFlagName = GetFlagNameCallback; callbacks.getFlagWriteTypeName = GetFlagWriteTypeNameCallback; @@ -1404,6 +1440,16 @@ bool Architecture::LiftFunction(LowLevelILFunction* function, FunctionLifterCont } +bool Architecture::GetInstructionTextWithContext( + const uint8_t* data, uint64_t addr, size_t& len, void* context, std::vector& result) +{ + return GetInstructionText(data, addr, len, result); +} + + +void Architecture::FreeFunctionArchContext(void* context) {} + + string Architecture::GetRegisterName(uint32_t reg) { return fmt::format("r{}", reg); @@ -1959,6 +2005,19 @@ bool CoreArchitecture::GetInstructionText( } +bool CoreArchitecture::GetInstructionTextWithContext( + const uint8_t* data, uint64_t addr, size_t& len, void* context, std::vector& result) +{ + BNInstructionTextToken* tokens = nullptr; + size_t count = 0; + if (!BNGetInstructionTextWithContext(m_object, data, addr, &len, context, &tokens, &count)) + return false; + + result = InstructionTextToken::ConvertAndFreeInstructionTextTokenList(tokens, count); + return true; +} + + bool CoreArchitecture::GetInstructionLowLevelIL(const uint8_t* data, uint64_t addr, size_t& len, LowLevelILFunction& il) { return BNGetInstructionLowLevelIL(m_object, data, addr, &len, il.GetObject()); @@ -1977,6 +2036,12 @@ bool CoreArchitecture::LiftFunction(LowLevelILFunction* function, FunctionLifter } +void CoreArchitecture::FreeFunctionArchContext(void* context) +{ + BNArchitectureFreeFunctionArchContext(m_object, context); +} + + string CoreArchitecture::GetRegisterName(uint32_t reg) { char* name = BNGetArchitectureRegisterName(m_object, reg); @@ -2487,6 +2552,13 @@ bool ArchitectureExtension::GetInstructionText( } +bool ArchitectureExtension::GetInstructionTextWithContext( + const uint8_t* data, uint64_t addr, size_t& len, void* context, vector& result) +{ + return m_base->GetInstructionTextWithContext(data, addr, len, context, result); +} + + bool ArchitectureExtension::GetInstructionLowLevelIL( const uint8_t* data, uint64_t addr, size_t& len, LowLevelILFunction& il) { diff --git a/binaryninjaapi.h b/binaryninjaapi.h index 5c73fe9a0..ca73e3915 100644 --- a/binaryninjaapi.h +++ b/binaryninjaapi.h @@ -9400,6 +9400,9 @@ namespace BinaryNinja { std::set& GetHaltedDisassemblyAddresses(); std::map& GetInlinedUnresolvedIndirectBranches(); + void* GetFunctionArchContext() { return m_context->functionArchContext; } + void SetFunctionArchContext(void* context) { m_context->functionArchContext = context; } + void AddTempOutgoingReference(Function* targetFunc); Ref CreateBasicBlock(Architecture* arch, uint64_t start); @@ -9422,6 +9425,7 @@ namespace BinaryNinja { std::map> m_autoIndirectBranches; std::set m_inlinedCalls; bool* m_containsInlinedFunctions; + void* m_functionArchContext; public: BNFunctionLifterContext* m_context; @@ -9437,6 +9441,7 @@ namespace BinaryNinja { std::map>& GetAutoIndirectBranches(); std::set& GetInlinedCalls(); void SetContainsInlinedFunctions(bool value); + void* GetFunctionArchContext(); void CheckForInlinedCall(BasicBlock* block, size_t instrCountBefore, size_t instrCountAfter, uint64_t prevAddr, uint64_t addr, const uint8_t* opcode, size_t len, @@ -9470,11 +9475,14 @@ namespace BinaryNinja { void* ctxt, const uint8_t* data, uint64_t addr, size_t maxLen, BNInstructionInfo* result); static bool GetInstructionTextCallback(void* ctxt, const uint8_t* data, uint64_t addr, size_t* len, BNInstructionTextToken** result, size_t* count); + static bool GetInstructionTextWithContextCallback(void* ctxt, const uint8_t* data, uint64_t addr, size_t* len, + void* context, BNInstructionTextToken** result, size_t* count); static void FreeInstructionTextCallback(BNInstructionTextToken* tokens, size_t count); static bool GetInstructionLowLevelILCallback( void* ctxt, const uint8_t* data, uint64_t addr, size_t* len, BNLowLevelILFunction* il); static void AnalyzeBasicBlocksCallback(void *ctxt, BNFunction* function, BNBasicBlockAnalysisContext* context); static bool LiftFunctionCallback(void* ctxt, BNLowLevelILFunction* function, BNFunctionLifterContext* context); + static void FreeFunctionArchContextCallback(void* ctxt, void* context); static char* GetRegisterNameCallback(void* ctxt, uint32_t reg); static char* GetFlagNameCallback(void* ctxt, uint32_t flag); static char* GetFlagWriteTypeNameCallback(void* ctxt, uint32_t flags); @@ -9652,6 +9660,20 @@ namespace BinaryNinja { virtual bool GetInstructionText( const uint8_t* data, uint64_t addr, size_t& len, std::vector& result) = 0; + /*! Retrieves a list of InstructionTextTokens while supplying contextual information + + \note Architecture subclasses can implement this method to provide contextual information from AnalyzeBasicBlocks + + \param[in] data pointer to the instruction data to retrieve text for + \param[in] addr address of the instruction data to retrieve text for + \param[out] len will be written to with the length of the instruction data which was translated + \param[in] context context to use when retrieving instruction text + \param[out] result + \return Whether instruction info was successfully retrieved. + */ + virtual bool GetInstructionTextWithContext(const uint8_t* data, uint64_t addr, size_t& len, void* context, + std::vector& result); + /*! Translates an instruction at addr and appends it onto the LowLevelILFunction& il. \note Architecture subclasses should implement this method. @@ -9678,6 +9700,12 @@ namespace BinaryNinja { */ virtual bool LiftFunction(LowLevelILFunction* function, FunctionLifterContext& context); + /*! Free the function architecture context + + \param context Function architecture context + */ + virtual void FreeFunctionArchContext(void* context); + /*! Gets a register name from a register index. \param reg Register index @@ -10072,10 +10100,13 @@ namespace BinaryNinja { const uint8_t* data, uint64_t addr, size_t maxLen, InstructionInfo& result) override; virtual bool GetInstructionText( const uint8_t* data, uint64_t addr, size_t& len, std::vector& result) override; + virtual bool GetInstructionTextWithContext(const uint8_t* data, uint64_t addr, size_t& len, void* context, + std::vector& result) override; virtual bool GetInstructionLowLevelIL( const uint8_t* data, uint64_t addr, size_t& len, LowLevelILFunction& il) override; virtual void AnalyzeBasicBlocks(Function* function, BasicBlockAnalysisContext& context) override; virtual bool LiftFunction(LowLevelILFunction* function, FunctionLifterContext& context) override; + virtual void FreeFunctionArchContext(void* context) override; virtual std::string GetRegisterName(uint32_t reg) override; virtual std::string GetFlagName(uint32_t flag) override; virtual std::string GetFlagWriteTypeName(uint32_t flags) override; @@ -10159,6 +10190,8 @@ namespace BinaryNinja { const uint8_t* data, uint64_t addr, size_t maxLen, InstructionInfo& result) override; virtual bool GetInstructionText( const uint8_t* data, uint64_t addr, size_t& len, std::vector& result) override; + virtual bool GetInstructionTextWithContext(const uint8_t* data, uint64_t addr, size_t& len, void* context, + std::vector& result) override; virtual bool GetInstructionLowLevelIL( const uint8_t* data, uint64_t addr, size_t& len, LowLevelILFunction& il) override; virtual std::string GetRegisterName(uint32_t reg) override; diff --git a/binaryninjacore.h b/binaryninjacore.h index 228b1745d..363d825b2 100644 --- a/binaryninjacore.h +++ b/binaryninjacore.h @@ -2049,6 +2049,8 @@ extern "C" size_t inlinedUnresolvedIndirectBranchCount; BNArchitectureAndAddress* inlinedUnresolvedIndirectBranches; + + void* functionArchContext; } BNBasicBlockAnalysisContext; typedef struct BNFunctionLifterContext { @@ -2075,6 +2077,8 @@ extern "C" size_t inlinedCallsCount; uint64_t* inlinedCalls; + void* functionArchContext; + // OUT bool* containsInlinedFunctions; } BNFunctionLifterContext; @@ -2094,11 +2098,14 @@ extern "C" void* ctxt, const uint8_t* data, uint64_t addr, size_t maxLen, BNInstructionInfo* result); bool (*getInstructionText)(void* ctxt, const uint8_t* data, uint64_t addr, size_t* len, BNInstructionTextToken** result, size_t* count); + bool (*getInstructionTextWithContext)(void* ctxt, const uint8_t* data, uint64_t addr, size_t* len, + void* context, BNInstructionTextToken** result, size_t* count); void (*freeInstructionText)(BNInstructionTextToken* tokens, size_t count); bool (*getInstructionLowLevelIL)( void* ctxt, const uint8_t* data, uint64_t addr, size_t* len, BNLowLevelILFunction* il); void (*analyzeBasicBlocks)(void* ctxt, BNFunction* function, BNBasicBlockAnalysisContext* context); bool (*liftFunction)(void *ctext, BNLowLevelILFunction* function, BNFunctionLifterContext* context); + void (*freeFunctionArchContext)(void *ctxt, void* context); char* (*getRegisterName)(void* ctxt, uint32_t reg); char* (*getFlagName)(void* ctxt, uint32_t flag); char* (*getFlagWriteTypeName)(void* ctxt, uint32_t flags); @@ -4931,6 +4938,8 @@ extern "C" BNArchitecture* arch, const uint8_t* data, uint64_t addr, size_t maxLen, BNInstructionInfo* result); BINARYNINJACOREAPI bool BNGetInstructionText(BNArchitecture* arch, const uint8_t* data, uint64_t addr, size_t* len, BNInstructionTextToken** result, size_t* count); + BINARYNINJACOREAPI bool BNGetInstructionTextWithContext(BNArchitecture* arch, const uint8_t* data, uint64_t addr, size_t* len, + void* context, BNInstructionTextToken** result, size_t* count); BINARYNINJACOREAPI bool BNGetInstructionLowLevelIL( BNArchitecture* arch, const uint8_t* data, uint64_t addr, size_t* len, BNLowLevelILFunction* il); BINARYNINJACOREAPI void BNFreeInstructionText(BNInstructionTextToken* tokens, size_t count); @@ -4942,6 +4951,7 @@ extern "C" BINARYNINJACOREAPI bool BNArchitectureDefaultLiftFunction(BNLowLevelILFunction* function, BNFunctionLifterContext* context); BINARYNINJACOREAPI bool BNArchitectureLiftFunction(BNArchitecture* arch, BNLowLevelILFunction* function, BNFunctionLifterContext* context); + BINARYNINJACOREAPI void BNArchitectureFreeFunctionArchContext(BNArchitecture* arch, void* context); BINARYNINJACOREAPI void BNFreeInstructionTextLines(BNInstructionTextLine* lines, size_t count); BINARYNINJACOREAPI char* BNGetArchitectureRegisterName(BNArchitecture* arch, uint32_t reg); BINARYNINJACOREAPI char* BNGetArchitectureFlagName(BNArchitecture* arch, uint32_t flag); diff --git a/python/architecture.py b/python/architecture.py index c941af8cc..8b9a5491c 100644 --- a/python/architecture.py +++ b/python/architecture.py @@ -69,348 +69,361 @@ @dataclass class BasicBlockAnalysisContext: - """Used by ``analyze_basic_blocks`` and contains analysis settings and other contextual information. + """Used by ``analyze_basic_blocks`` and contains analysis settings and other contextual information. .. note:: This class is meant to be used by Architecture plugins only """ - _handle: core.BNBasicBlockAnalysisContext - _function: "function.Function" - _contextual_returns_dirty: bool - - # In - _indirect_branches: List["variable.IndirectBranchInfo"] - _indirect_no_return_calls: Set["function.ArchAndAddr"] - _analysis_skip_override: core.FunctionAnalysisSkipOverride - _guided_analysis_mode: bool - _trigger_guided_on_invalid_instruction: bool - _translate_tail_calls: bool - _disallow_branch_to_string: bool - _max_function_size: int - - # In/Out - _max_size_reached: bool - _contextual_returns: Dict["function.ArchAndAddr", bool] - - # Out - _direct_code_references: Dict[int, "function.ArchAndAddr"] - _direct_no_return_calls: Set["function.ArchAndAddr"] - _halted_disassembly_addresses: Set["function.ArchAndAddr"] - - @staticmethod - def from_core_struct(bn_bb_context: core.BNBasicBlockAnalysisContext) -> "BasicBlockAnalysisContext": - """Create a BasicBlockAnalysisContext from a core.BNBasicBlockAnalysisContext structure.""" - - indirect_branches = [] - for i in range(0, bn_bb_context.indirectBranchesCount): - ibi = variable.IndirectBranchInfo( - source_arch=CoreArchitecture._from_cache(bn_bb_context.indirectBranches[i].sourceArch), - source_addr=bn_bb_context.indirectBranches[i].sourceAddr, - dest_arch=CoreArchitecture._from_cache(bn_bb_context.indirectBranches[i].destArch), - dest_addr=bn_bb_context.indirectBranches[i].destAddr, - auto_defined=bn_bb_context.indirectBranches[i].autoDefined, - ) - indirect_branches.append(ibi) - - indirect_no_return_calls = set() - for i in range(0, bn_bb_context.indirectNoReturnCallsCount): - loc = function.ArchAndAddr( - CoreArchitecture._from_cache(bn_bb_context.indirectNoReturnCalls[i].arch), - bn_bb_context.indirectNoReturnCalls[i].address, - ) - indirect_no_return_calls.add(loc) - - contextual_returns = {} - for i in range(0, bn_bb_context.contextualFunctionReturnCount): - loc = function.ArchAndAddr( - CoreArchitecture._from_cache(bn_bb_context.contextualFunctionReturnLocations[i].arch), - bn_bb_context.contextualFunctionReturnLocations[i].address, - ) - contextual_returns[loc] = bn_bb_context._contextualFunctionReturnValues[i] - - direct_code_references = {} - for i in range(0, bn_bb_context.directRefCount): - src = function.ArchAndAddr( - CoreArchitecture._from_cache(bn_bb_context.directRefSources[i].arch), - bn_bb_context.directRefSources[i].address, - ) - direct_code_references[bn_bb_context.directRefTargets[i]] = src - - direct_no_return_calls = set() - for i in range(0, bn_bb_context.directNoReturnCallsCount): - loc = function.ArchAndAddr( - CoreArchitecture._from_cache(bn_bb_context.directNoReturnCallLocations[i].arch), - bn_bb_context.directNoReturnCallLocations[i].address, - ) - direct_no_return_calls.add(loc) - - halted_disassembly_addresses = set() - for i in range(0, bn_bb_context.haltedDisassemblyAddressesCount): - addr = function.ArchAndAddr( - CoreArchitecture._from_cache(bn_bb_context.haltedDisassemblyAddresses[i].arch), - bn_bb_context.haltedDisassemblyAddresses[i].address, - ) - halted_disassembly_addresses.add(addr) - - view = binaryview.BinaryView(handle=core.BNGetFunctionData(bn_bb_context.function)) - return BasicBlockAnalysisContext( - _handle=bn_bb_context, - _function=function.Function(view, core.BNNewFunctionReference(bn_bb_context.function)), - _indirect_branches=indirect_branches, - _indirect_no_return_calls=indirect_no_return_calls, - _analysis_skip_override=bn_bb_context.analysisSkipOverride, - _guided_analysis_mode=bn_bb_context.guidedAnalysisMode, - _trigger_guided_on_invalid_instruction=bn_bb_context.triggerGuidedOnInvalidInstruction, - _translate_tail_calls=bn_bb_context.translateTailCalls, - _disallow_branch_to_string=bn_bb_context.disallowBranchToString, - _max_function_size=bn_bb_context.maxFunctionSize, - _max_size_reached=bn_bb_context.maxSizeReached, - _contextual_returns=contextual_returns, - _contextual_returns_dirty=False, - _direct_code_references=direct_code_references, - _direct_no_return_calls=direct_no_return_calls, - _halted_disassembly_addresses=halted_disassembly_addresses, - ) - - @property - def indirect_branches(self) -> List["variable.IndirectBranchInfo"]: - """Get the list of indirect branches in this context.""" - - return self._indirect_branches - - @property - def indirect_no_return_calls(self) -> Set["function.ArchAndAddr"]: - """Get the set of indirect no-return calls in this context.""" - - return self._indirect_no_return_calls - - @property - def analysis_skip_override(self) -> core.FunctionAnalysisSkipOverride: - """Get the analysis skip override setting for this context.""" - - return self._analysis_skip_override - - @property - def guided_analysis_mode(self) -> bool: - """Get the setting that determines if functions start in guided analysis mode.""" - - return self._guided_analysis_mode - - @property - def trigger_guided_on_invalid_instruction(self) -> bool: - """Get the setting that determines if guided mode should be triggered on invalid instructions.""" - - return self._trigger_guided_on_invalid_instruction - - @property - def translate_tail_calls(self) -> bool: - """Get setting from context that determines if tail calls should be translated.""" - - return self._translate_tail_calls - - @property - def disallow_branch_to_string(self) -> bool: - """Get setting from context that determines if branches to string addresses should be disallowed.""" - - return self._disallow_branch_to_string - - @property - def max_function_size(self) -> int: - """Get the maximum function size setting for this context.""" - - return self._max_function_size - - @property - def halt_on_invalid_instruction(self) -> bool: - """Get the setting from context that determines if analysis should halt on invalid instructions.""" - - return self._halt_on_invalid_instruction - - @property - def max_size_reached(self) -> bool: - """Get boolean that indicates if the maximum function size has been reached.""" - - return self._max_size_reached - - @max_size_reached.setter - def max_size_reached(self, value: bool) -> None: - """Set boolean that indicates if the maximum function size has been reached. + _handle: core.BNBasicBlockAnalysisContext + _function: "function.Function" + _contextual_returns_dirty: bool + + # In + _indirect_branches: List["variable.IndirectBranchInfo"] + _indirect_no_return_calls: Set["function.ArchAndAddr"] + _analysis_skip_override: core.FunctionAnalysisSkipOverride + _guided_analysis_mode: bool + _trigger_guided_on_invalid_instruction: bool + _translate_tail_calls: bool + _disallow_branch_to_string: bool + _max_function_size: int + + # In/Out + _max_size_reached: bool + _contextual_returns: Dict["function.ArchAndAddr", bool] + + # Out + _direct_code_references: Dict[int, "function.ArchAndAddr"] + _direct_no_return_calls: Set["function.ArchAndAddr"] + _halted_disassembly_addresses: Set["function.ArchAndAddr"] + _function_arch_context_token: int + + @staticmethod + def from_core_struct(bn_bb_context: core.BNBasicBlockAnalysisContext) -> "BasicBlockAnalysisContext": + """Create a BasicBlockAnalysisContext from a core.BNBasicBlockAnalysisContext structure.""" + + indirect_branches = [] + for i in range(0, bn_bb_context.indirectBranchesCount): + ibi = variable.IndirectBranchInfo( + source_arch=CoreArchitecture._from_cache(bn_bb_context.indirectBranches[i].sourceArch), + source_addr=bn_bb_context.indirectBranches[i].sourceAddr, + dest_arch=CoreArchitecture._from_cache(bn_bb_context.indirectBranches[i].destArch), + dest_addr=bn_bb_context.indirectBranches[i].destAddr, + auto_defined=bn_bb_context.indirectBranches[i].autoDefined, + ) + indirect_branches.append(ibi) + + indirect_no_return_calls = set() + for i in range(0, bn_bb_context.indirectNoReturnCallsCount): + loc = function.ArchAndAddr( + CoreArchitecture._from_cache(bn_bb_context.indirectNoReturnCalls[i].arch), + bn_bb_context.indirectNoReturnCalls[i].address, + ) + indirect_no_return_calls.add(loc) + + contextual_returns = {} + for i in range(0, bn_bb_context.contextualFunctionReturnCount): + loc = function.ArchAndAddr( + CoreArchitecture._from_cache(bn_bb_context.contextualFunctionReturnLocations[i].arch), + bn_bb_context.contextualFunctionReturnLocations[i].address, + ) + contextual_returns[loc] = bn_bb_context._contextualFunctionReturnValues[i] + + direct_code_references = {} + for i in range(0, bn_bb_context.directRefCount): + src = function.ArchAndAddr( + CoreArchitecture._from_cache(bn_bb_context.directRefSources[i].arch), + bn_bb_context.directRefSources[i].address, + ) + direct_code_references[bn_bb_context.directRefTargets[i]] = src + + direct_no_return_calls = set() + for i in range(0, bn_bb_context.directNoReturnCallsCount): + loc = function.ArchAndAddr( + CoreArchitecture._from_cache(bn_bb_context.directNoReturnCallLocations[i].arch), + bn_bb_context.directNoReturnCallLocations[i].address, + ) + direct_no_return_calls.add(loc) + + halted_disassembly_addresses = set() + for i in range(0, bn_bb_context.haltedDisassemblyAddressesCount): + addr = function.ArchAndAddr( + CoreArchitecture._from_cache(bn_bb_context.haltedDisassemblyAddresses[i].arch), + bn_bb_context.haltedDisassemblyAddresses[i].address, + ) + halted_disassembly_addresses.add(addr) + + view = binaryview.BinaryView(handle=core.BNGetFunctionData(bn_bb_context.function)) + return BasicBlockAnalysisContext( + _handle=bn_bb_context, + _function=function.Function(view, core.BNNewFunctionReference(bn_bb_context.function)), + _indirect_branches=indirect_branches, _indirect_no_return_calls=indirect_no_return_calls, + _analysis_skip_override=bn_bb_context.analysisSkipOverride, + _guided_analysis_mode=bn_bb_context.guidedAnalysisMode, + _trigger_guided_on_invalid_instruction=bn_bb_context.triggerGuidedOnInvalidInstruction, + _translate_tail_calls=bn_bb_context.translateTailCalls, + _disallow_branch_to_string=bn_bb_context.disallowBranchToString, + _max_function_size=bn_bb_context.maxFunctionSize, _max_size_reached=bn_bb_context.maxSizeReached, + _contextual_returns=contextual_returns, _contextual_returns_dirty=False, + _direct_code_references=direct_code_references, _direct_no_return_calls=direct_no_return_calls, + _halted_disassembly_addresses=halted_disassembly_addresses, + _function_arch_context_token=bn_bb_context.functionArchContext, + ) + + @property + def indirect_branches(self) -> List["variable.IndirectBranchInfo"]: + """Get the list of indirect branches in this context.""" + + return self._indirect_branches + + @property + def indirect_no_return_calls(self) -> Set["function.ArchAndAddr"]: + """Get the set of indirect no-return calls in this context.""" + + return self._indirect_no_return_calls + + @property + def analysis_skip_override(self) -> core.FunctionAnalysisSkipOverride: + """Get the analysis skip override setting for this context.""" + + return self._analysis_skip_override + + @property + def guided_analysis_mode(self) -> bool: + """Get the setting that determines if functions start in guided analysis mode.""" + + return self._guided_analysis_mode + + @property + def trigger_guided_on_invalid_instruction(self) -> bool: + """Get the setting that determines if guided mode should be triggered on invalid instructions.""" + + return self._trigger_guided_on_invalid_instruction + + @property + def translate_tail_calls(self) -> bool: + """Get setting from context that determines if tail calls should be translated.""" + + return self._translate_tail_calls + + @property + def disallow_branch_to_string(self) -> bool: + """Get setting from context that determines if branches to string addresses should be disallowed.""" + + return self._disallow_branch_to_string + + @property + def max_function_size(self) -> int: + """Get the maximum function size setting for this context.""" + + return self._max_function_size + + @property + def halt_on_invalid_instruction(self) -> bool: + """Get the setting from context that determines if analysis should halt on invalid instructions.""" + + return self._halt_on_invalid_instruction + + @property + def max_size_reached(self) -> bool: + """Get boolean that indicates if the maximum function size has been reached.""" + + return self._max_size_reached + + @max_size_reached.setter + def max_size_reached(self, value: bool) -> None: + """Set boolean that indicates if the maximum function size has been reached. :param bool value: The new value for max_size_reached """ - if not isinstance(value, bool): - raise TypeError("value must be a boolean") + if not isinstance(value, bool): + raise TypeError("value must be a boolean") - self._max_size_reached = value + self._max_size_reached = value - @property - def contextual_returns(self) -> Dict["function.ArchAndAddr", bool]: - """Get the mapping of contextual function return locations to their values.""" + @property + def contextual_returns(self) -> Dict["function.ArchAndAddr", bool]: + """Get the mapping of contextual function return locations to their values.""" - return self._contextual_returns + return self._contextual_returns - def add_contextual_return(self, loc: "function.ArchAndAddr", value: bool) -> None: - """ + def add_contextual_return(self, loc: "function.ArchAndAddr", value: bool) -> None: + """ ``add_contextual_return`` adds a contextual function return location and its value to the current function. :param function.ArchAndAddr loc: The location of the contextual function return :param bool value: The value of the contextual function return """ - if not isinstance(value, bool): - raise TypeError("value must be a boolean") + if not isinstance(value, bool): + raise TypeError("value must be a boolean") - if not isinstance(loc, function.ArchAndAddr): - raise TypeError("loc must be an instance of function.ArchAndAddr") + if not isinstance(loc, function.ArchAndAddr): + raise TypeError("loc must be an instance of function.ArchAndAddr") - # Update existing value if it exists - if loc in self._contextual_returns: - if self._contextual_returns[loc] == value: - return + # Update existing value if it exists + if loc in self._contextual_returns: + if self._contextual_returns[loc] == value: + return - self._contextual_returns[loc] = value - self._contextual_returns_dirty = True + self._contextual_returns[loc] = value + self._contextual_returns_dirty = True - @property - def direct_code_references(self) -> Dict[int, "function.ArchAndAddr"]: - """Get the mapping of direct code reference targets to their source locations.""" + @property + def direct_code_references(self) -> Dict[int, "function.ArchAndAddr"]: + """Get the mapping of direct code reference targets to their source locations.""" - return self._direct_code_references + return self._direct_code_references - def add_direct_code_reference(self, target: int, source: "function.ArchAndAddr") -> None: - """ + def add_direct_code_reference(self, target: int, source: "function.ArchAndAddr") -> None: + """ ``add_direct_code_reference`` adds a direct code reference to the current function. :param int target: The target address of the direct code reference :param function.ArchAndAddr source: The source location of the direct code reference """ - if not isinstance(target, int): - raise TypeError("target must be an integer") + if not isinstance(target, int): + raise TypeError("target must be an integer") - if not isinstance(source, function.ArchAndAddr): - raise TypeError("source must be an instance of function.ArchAndAddr") + if not isinstance(source, function.ArchAndAddr): + raise TypeError("source must be an instance of function.ArchAndAddr") - self._direct_code_references[target] = source + self._direct_code_references[target] = source - @property - def direct_no_return_calls(self) -> Set["function.ArchAndAddr"]: - """Get the set of direct no-return call locations in this context.""" + @property + def direct_no_return_calls(self) -> Set["function.ArchAndAddr"]: + """Get the set of direct no-return call locations in this context.""" - return self._direct_no_return_calls + return self._direct_no_return_calls - def add_direct_no_return_call(self, loc: "function.ArchAndAddr") -> None: - """ + def add_direct_no_return_call(self, loc: "function.ArchAndAddr") -> None: + """ ``add_direct_no_return_call`` adds a direct no-return call location to the current function. :param function.ArchAndAddr loc: The location of the direct no-return call """ - if not isinstance(loc, function.ArchAndAddr): - raise TypeError("loc must be an instance of function.ArchAndAddr") + if not isinstance(loc, function.ArchAndAddr): + raise TypeError("loc must be an instance of function.ArchAndAddr") - self._direct_no_return_calls.add(loc) + self._direct_no_return_calls.add(loc) - @property - def halted_disassembly_addresses(self) -> Set["function.ArchAndAddr"]: - """Get the set of addresses where disassembly has been halted.""" + @property + def halted_disassembly_addresses(self) -> Set["function.ArchAndAddr"]: + """Get the set of addresses where disassembly has been halted.""" - return self._halted_disassembly_addresses + return self._halted_disassembly_addresses - def add_halted_disassembly_address(self, loc: "function.ArchAndAddr") -> None: - """ + def add_halted_disassembly_address(self, loc: "function.ArchAndAddr") -> None: + """ ``add_halted_disassembly_address`` adds an address to the set of halted disassembly addresses. :param function.ArchAndAddr loc: The location of the halted disassembly address """ - if not isinstance(loc, function.ArchAndAddr): - raise TypeError("loc must be an instance of function.ArchAndAddr") + if not isinstance(loc, function.ArchAndAddr): + raise TypeError("loc must be an instance of function.ArchAndAddr") - self._halted_disassembly_addresses.add(loc) + self._halted_disassembly_addresses.add(loc) - def create_basic_block(self, arch: "Architecture", start: int) -> Optional["basicblock.BasicBlock"]: - """ + @property + def function_arch_context(self) -> Any: + """Get the function architecture context""" + + return self._function.arch.function_arch_contexts.get(self._function_arch_context_token, None) + + @function_arch_context.setter + def function_arch_context(self, value: Any) -> None: + """Set the function architecture context""" + + token = id(self._function) + self._function.arch.function_arch_contexts[token] = value + self._function_arch_context_token = token + + def create_basic_block(self, arch: "Architecture", start: int) -> Optional["basicblock.BasicBlock"]: + """ ``create_basic_block`` creates a new BasicBlock at the specified address for the given Architecture. :param Architecture arch: Architecture of the BasicBlock to create :param int start: Address of the BasicBlock to create """ - if not isinstance(arch, Architecture): - raise TypeError("arch must be an instance of architecture.Architecture") + if not isinstance(arch, Architecture): + raise TypeError("arch must be an instance of architecture.Architecture") - bnblock = core.BNAnalyzeBasicBlocksContextCreateBasicBlock(self._handle, arch.handle, start) - if not bnblock: - return None + bnblock = core.BNAnalyzeBasicBlocksContextCreateBasicBlock(self._handle, arch.handle, start) + if not bnblock: + return None - view = binaryview.BinaryView(handle=core.BNGetFunctionData(self._function.handle)) - return basicblock.BasicBlock(bnblock, view) + view = binaryview.BinaryView(handle=core.BNGetFunctionData(self._function.handle)) + return basicblock.BasicBlock(bnblock, view) - def add_basic_block(self, block: "basicblock.BasicBlock") -> None: - """ + def add_basic_block(self, block: "basicblock.BasicBlock") -> None: + """ ``add_basic_block`` adds a BasicBlock to the current function. :param basicblock.BasicBlock block: The BasicBlock to add """ - if not isinstance(block, basicblock.BasicBlock): - raise TypeError("block must be an instance of basicblock.BasicBlock") + if not isinstance(block, basicblock.BasicBlock): + raise TypeError("block must be an instance of basicblock.BasicBlock") - core.BNAnalyzeBasicBlocksContextAddBasicBlockToFunction(self._handle, block.handle) + core.BNAnalyzeBasicBlocksContextAddBasicBlockToFunction(self._handle, block.handle) - def add_temp_outgoing_reference(self, target: "function.Function") -> None: - """ + def add_temp_outgoing_reference(self, target: "function.Function") -> None: + """ ``add_temp_outgoing_reference`` adds a temporary outgoing reference to the specified function. :param function.Function target: The target function to add a temporary outgoing reference to """ - if not isinstance(target, function.Function): - raise TypeError("target must be an instance of function.Function") + if not isinstance(target, function.Function): + raise TypeError("target must be an instance of function.Function") - core.BNAnalyzeBasicBlocksContextAddTempReference(self._handle, target.handle) + core.BNAnalyzeBasicBlocksContextAddTempReference(self._handle, target.handle) - def finalize(self) -> None: - """ + def finalize(self) -> None: + """ ``finalize`` finalizes the function's basic block analysis """ - if self._direct_code_references: - total = len(self._direct_code_references) - sources = (core.BNArchitectureAndAddress * total)() - targets = (ctypes.c_ulonglong * total)() - for i, (target, src) in enumerate(self._direct_code_references.items()): - sources[i].arch = src.arch.handle - sources[i].address = src.addr - targets[i] = target - - core.BNAnalyzeBasicBlocksContextSetDirectCodeReferences(self._handle, sources, targets, total) - - if self._direct_no_return_calls: - total = len(self._direct_no_return_calls) - direct_no_return_calls = (core.BNArchitectureAndAddress * total)() - for i, loc in enumerate(self._direct_no_return_calls): - direct_no_return_calls[i].arch = loc.arch.handle - direct_no_return_calls[i].address = loc.addr - core.BNAnalyzeBasicBlocksContextSetDirectNoReturnCalls(self._handle, direct_no_return_calls, total) - - self._halted_disassembly_addresses.add(function.ArchAndAddr(self._function.arch, 0)) - if self._halted_disassembly_addresses: - total = len(self._halted_disassembly_addresses) - halted_addresses = (core.BNArchitectureAndAddress * total)() - for i, loc in enumerate(self._halted_disassembly_addresses): - halted_addresses[i].arch = loc.arch.handle - halted_addresses[i].address = loc.addr - core.BNAnalyzeBasicBlocksContextSetHaltedDisassemblyAddresses(self._handle, halted_addresses, total) - - self._handle.maxSizeReached = ctypes.c_bool(self._max_size_reached) - if self._contextual_returns_dirty: - total = len(self._contextual_returns) - values = (ctypes.c_bool * total)() - returns = (core.BNArchitectureAndAddress * total)() - for i, (loc, value) in enumerate(self._contextual_returns.items()): - returns[i].arch = loc.arch.handle - returns[i].address = loc.addr - values[i] = value - core.BNAnalyzeBasicBlocksContextSetContextualFunctionReturns(self._handle, returns, values, total) - - core.BNAnalyzeBasicBlocksContextFinalize(self._handle) + if self._direct_code_references: + total = len(self._direct_code_references) + sources = (core.BNArchitectureAndAddress * total)() + targets = (ctypes.c_ulonglong * total)() + for i, (target, src) in enumerate(self._direct_code_references.items()): + sources[i].arch = src.arch.handle + sources[i].address = src.addr + targets[i] = target + + core.BNAnalyzeBasicBlocksContextSetDirectCodeReferences(self._handle, sources, targets, total) + + if self._direct_no_return_calls: + total = len(self._direct_no_return_calls) + direct_no_return_calls = (core.BNArchitectureAndAddress * total)() + for i, loc in enumerate(self._direct_no_return_calls): + direct_no_return_calls[i].arch = loc.arch.handle + direct_no_return_calls[i].address = loc.addr + core.BNAnalyzeBasicBlocksContextSetDirectNoReturnCalls(self._handle, direct_no_return_calls, total) + + self._halted_disassembly_addresses.add(function.ArchAndAddr(self._function.arch, 0)) + if self._halted_disassembly_addresses: + total = len(self._halted_disassembly_addresses) + halted_addresses = (core.BNArchitectureAndAddress * total)() + for i, loc in enumerate(self._halted_disassembly_addresses): + halted_addresses[i].arch = loc.arch.handle + halted_addresses[i].address = loc.addr + core.BNAnalyzeBasicBlocksContextSetHaltedDisassemblyAddresses(self._handle, halted_addresses, total) + + self._handle.maxSizeReached = ctypes.c_bool(self._max_size_reached) + if self._contextual_returns_dirty: + total = len(self._contextual_returns) + values = (ctypes.c_bool * total)() + returns = (core.BNArchitectureAndAddress * total)() + for i, (loc, value) in enumerate(self._contextual_returns.items()): + returns[i].arch = loc.arch.handle + returns[i].address = loc.addr + values[i] = value + core.BNAnalyzeBasicBlocksContextSetContextualFunctionReturns(self._handle, returns, values, total) + + self._handle.functionArchContext = self._function_arch_context_token + core.BNAnalyzeBasicBlocksContextFinalize(self._handle) @dataclass @@ -430,10 +443,12 @@ class FunctionLifterContext: _user_indirect_branches: Dict["function.ArchAndAddr", Set["function.ArchAndAddr"]] _auto_indirect_branches: Dict["function.ArchAndAddr", Set["function.ArchAndAddr"]] _inlined_calls: Set[int] + _function_arch_context_token: int @staticmethod - def from_core_struct(func: core.BNLowLevelILFunction, - bn_fl_context: core.BNFunctionLifterContext) -> "FunctionLifterContext": + def from_core_struct( + func: core.BNLowLevelILFunction, bn_fl_context: core.BNFunctionLifterContext + ) -> "FunctionLifterContext": """Create a FunctionLifterContext from a core.BNFunctionLifterContext structure.""" session_id = core.BNLoggerGetSessionId(bn_fl_context.logger) @@ -443,17 +458,13 @@ def from_core_struct(func: core.BNLowLevelILFunction, plat = platform.CorePlatform._from_cache(core.BNNewPlatformReference(bn_fl_context.platform)) blocks = [] for i in range(0, bn_fl_context.basicBlockCount): - blocks.append( - basicblock.BasicBlock( - core.BNNewBasicBlockReference(bn_fl_context.basicBlocks[i]) - ) - ) + blocks.append(basicblock.BasicBlock(core.BNNewBasicBlockReference(bn_fl_context.basicBlocks[i]))) contextual_returns = {} for i in range(0, bn_fl_context.contextualFunctionReturnCount): loc = function.ArchAndAddr( - CoreArchitecture._from_cache(bn_fl_context.contextualFunctionReturnLocations[i].arch), - bn_fl_context.contextualFunctionReturnLocations[i].address, + CoreArchitecture._from_cache(bn_fl_context.contextualFunctionReturnLocations[i].arch), + bn_fl_context.contextualFunctionReturnLocations[i].address, ) contextual_returns[loc] = bn_fl_context._contextualFunctionReturnValues[i] @@ -461,12 +472,12 @@ def from_core_struct(func: core.BNLowLevelILFunction, inline_remapping = {} for i in range(0, bn_fl_context.inlinedRemappingEntryCount): key = function.ArchAndAddr( - CoreArchitecture._from_cache(bn_fl_context.inlinedRemappingKeys[i].arch), - bn_fl_context.inlinedRemappingKeys[i].address, + CoreArchitecture._from_cache(bn_fl_context.inlinedRemappingKeys[i].arch), + bn_fl_context.inlinedRemappingKeys[i].address, ) dest = function.ArchAndAddr( - CoreArchitecture._from_cache(bn_fl_context.inlinedRemappingEntries[i].destination.arch), - bn_fl_context.inlinedRemappingEntries[i].destination.address, + CoreArchitecture._from_cache(bn_fl_context.inlinedRemappingEntries[i].destination.arch), + bn_fl_context.inlinedRemappingEntries[i].destination.address, ) inline_remapping[src] = dest @@ -474,8 +485,8 @@ def from_core_struct(func: core.BNLowLevelILFunction, auto_indirect_branches = {} for i in range(0, bn_fl_context.indirectBranchesCount): src = function.ArchAndAddr( - CoreArchitecture._from_cache(bn_fl_context.indirectBranches[i].sourceArch), - bn_fl_context.indirectBranches[i].sourceAddr, + CoreArchitecture._from_cache(bn_fl_context.indirectBranches[i].sourceArch), + bn_fl_context.indirectBranches[i].sourceAddr, ) dest = function.ArchAndAddr( @@ -492,37 +503,35 @@ def from_core_struct(func: core.BNLowLevelILFunction, user_indirect_branches[src] = set() user_indirect_branches[src].add(dest) - inlined_calls = set() for i in range(0, bn_fl_context.inlinedCallsCount): inlined_calls.add(bn_fl_context.inlinedCalls[i]) return FunctionLifterContext( - _handle=bn_fl_context, - _function=lowlevelil.LowLevelILFunction( - plat.arch, core.BNNewLowLevelILFunctionReference(func) - ), - _platform=plat, - _logger=logger, - _blocks=blocks, - _contextual_returns=contextual_returns, - _inline_remapping=inline_remapping, - _user_indirect_branches=user_indirect_branches, - _auto_indirect_branches=auto_indirect_branches, - _inlined_calls=inlined_calls, + _handle=bn_fl_context, + _function=lowlevelil.LowLevelILFunction(plat.arch, + core.BNNewLowLevelILFunctionReference(func)), _platform=plat, + _logger=logger, _blocks=blocks, _contextual_returns=contextual_returns, _inline_remapping=inline_remapping, + _user_indirect_branches=user_indirect_branches, _auto_indirect_branches=auto_indirect_branches, + _inlined_calls=inlined_calls, _function_arch_context_token=bn_fl_context.functionArchContext, ) def prepare_block_translation(self, function, arch, address): - """Prepare block for translation""" + """Prepare the basic block for translation""" core.BNPrepareBlockTranslation(function.handle, arch.handle, address) @property def blocks(self) -> List["basicblock.BasicBlock"]: - """Get the list of basic blocks in this context.""" + """Get the list of basic blocks in this context""" return self._blocks + @property + def function_arch_context(self) -> Any: + """Get the function architecture context""" + + return self._function.arch.function_arch_contexts.get(self._function_arch_context_token, None) @dataclass(frozen=True) class RegisterInfo: @@ -697,6 +706,7 @@ class Architecture(metaclass=_ArchitectureMetaClass): reg_stacks: Dict[RegisterStackName, RegisterStackInfo] = {} intrinsics = {} next_address = 0 + function_arch_contexts: Dict[int, Any] = {} def __init__(self): binaryninja._init_plugins() @@ -718,12 +728,16 @@ def __init__(self): ) self._cb.getInstructionInfo = self._cb.getInstructionInfo.__class__(self._get_instruction_info) self._cb.getInstructionText = self._cb.getInstructionText.__class__(self._get_instruction_text) + self._cb.getInstructionTextWithContext = self._cb.getInstructionTextWithContext.__class__( + self._get_instruction_text_with_context + ) self._cb.freeInstructionText = self._cb.freeInstructionText.__class__(self._free_instruction_text) self._cb.getInstructionLowLevelIL = self._cb.getInstructionLowLevelIL.__class__( self._get_instruction_low_level_il ) self._cb.analyzeBasicBlocks = self._cb.analyzeBasicBlocks.__class__(self._analyze_basic_blocks) self._cb.liftFunction = self._cb.liftFunction.__class__(self._lift_function) + self._cb.freeFunctionArchContext = self._cb.freeFunctionArchContext.__class__(self._free_function_arch_context) self._cb.getRegisterName = self._cb.getRegisterName.__class__(self._get_register_name) self._cb.getFlagName = self._cb.getFlagName.__class__(self._get_flag_name) self._cb.getFlagWriteTypeName = self._cb.getFlagWriteTypeName.__class__(self._get_flag_write_type_name) @@ -1165,6 +1179,26 @@ def _get_instruction_text(self, ctxt, data, addr, length, result, count): log_error_for_exception("Unhandled Python exception in Architecture._get_instruction_text") return False + def _get_instruction_text_with_context(self, ctxt, data, addr, length, context_token, result, count): + try: + buf = ctypes.create_string_buffer(length[0]) + ctypes.memmove(buf, data, length[0]) + context = self.function_arch_contexts.get(context_token, None) + info = self.get_instruction_text_with_context(buf.raw, addr, context) + if info is None: + return False + tokens = info[0] + length[0] = info[1] + count[0] = len(tokens) + token_buf = function.InstructionTextToken._get_core_struct(tokens) + result[0] = token_buf + ptr = ctypes.cast(token_buf, ctypes.c_void_p) + self._pending_token_lists[ptr.value] = (ptr.value, token_buf) + return True + except: + log_error_for_exception("Unhandled Python exception in Architecture._get_instruction_text_with_context") + return False + def _free_instruction_text(self, tokens, count): try: buf = ctypes.cast(tokens, ctypes.c_void_p) @@ -1206,6 +1240,12 @@ def _lift_function(self, ctx, func, ptr_bn_fl_context): log_error_for_exception("Unhandled Python exception in Architecture._lift_function") return False + def _free_function_arch_context(self, ctx, context_token): + try: + self.function_arch_contexts.pop(context_token, None) + except: + log_error_for_exception("Unhandled Python exception in Architecture._free_function_arch_context") + def _get_register_name(self, ctxt, reg): try: if reg in self._regs_by_index: @@ -1895,6 +1935,20 @@ def get_instruction_text(self, data: bytes, addr: int) -> Optional[Tuple[List['f """ raise NotImplementedError + def get_instruction_text_with_context(self, data: bytes, addr: int, context: Any) -> Optional[Tuple[List['function.InstructionTextToken'], int]]: + """ + ``get_instruction_text`` returns a tuple containing a list of decoded InstructionTextToken objects and the bytes used at the given virtual + address ``addr`` with data ``data``. + + .. note:: Architecture subclasses should implement this method if they require context from analyze_basic_blocks for instruction decoding. + + :param str data: a maximum of max_instruction_length bytes from the binary at virtual address ``addr`` + :param int addr: virtual address of bytes in ``data`` + :param Any context: function architecture context + :return: a tuple containing the InstructionTextToken list and length of bytes decoded + """ + return self.get_instruction_text(data, addr) + def get_instruction_low_level_il_instruction( self, bv: 'binaryview.BinaryView', addr: int ) -> 'lowlevelil.LowLevelILInstruction': @@ -3284,6 +3338,8 @@ def __init__(self, base_arch: 'Architecture'): self._cb.getInstructionInfo = self._cb.getInstructionInfo.__class__() if self.get_instruction_text.__code__ == CoreArchitecture.get_instruction_text.__code__: self._cb.getInstructionText = self._cb.getInstructionText.__class__() + if self.get_instruction_text_with_context.__code__ == CoreArchitecture.get_instruction_text_with_context.__code__: + self._cb.getInstructionTextWithContext = self._cb.getInstructionTextWithContext.__class__() if self.__class__.stack_pointer is None: self._cb.getStackPointerRegister = self._cb.getStackPointerRegister.__class__() if self.__class__.link_reg is None: diff --git a/rust/src/architecture.rs b/rust/src/architecture.rs index 6b5d44e54..b0064f681 100644 --- a/rust/src/architecture.rs +++ b/rust/src/architecture.rs @@ -42,6 +42,8 @@ use std::{ mem::MaybeUninit, }; +use std::ptr::NonNull; + use crate::function_recognizer::FunctionRecognizer; use crate::relocation::{CustomRelocationHandlerHandle, RelocationHandler}; @@ -192,6 +194,30 @@ pub trait Architecture: 'static + Sized + AsRef { addr: u64, ) -> Option<(usize, Vec)>; + /// Disassembles a raw byte sequence into a human-readable list of text tokens. + /// + /// This function is responsible for the visual representation of assembly instructions. + /// It does *not* define semantics (use [`Architecture::instruction_llil`] for that); + /// it simply tells the UI how to print the instruction. This variant includes contextual data, which + /// can be produced by analyze_basic_blocks + /// + /// # Returns + /// + /// An `Option` containing a tuple: + /// + /// * `usize`: The size of the decoded instruction in bytes. Is used to advance to the next instruction. + /// * `Vec`: A list of text tokens representing the instruction. + /// + /// Returns `None` if the bytes do not form a valid instruction. + fn instruction_text_with_context( + &self, + data: &[u8], + addr: u64, + _context: Option>, + ) -> Option<(usize, Vec)> { + self.instruction_text(data, addr) + } + // TODO: Why do we need to return a boolean here? Does `None` not represent the same thing? /// Appends arbitrary low-level il instructions to `il`. /// @@ -226,6 +252,12 @@ pub trait Architecture: 'static + Sized + AsRef { unsafe { BNArchitectureDefaultLiftFunction(function.handle, context.handle) } } + /// Free the function architecture context + /// + /// NOTE: Only implement this method in architecture plugins that allocate a context in + /// analyze_basic_blocks + fn free_function_arch_context(&self, _context: Option>) {} + /// Fallback flag value calculation path. This method is invoked when the core is unable to /// recover the flag using semantics and resorts to emitting instructions that explicitly set each /// observed flag to the value of an expression returned by this function. @@ -705,6 +737,38 @@ impl Architecture for CoreArchitecture { } } + fn instruction_text_with_context( + &self, + data: &[u8], + addr: u64, + context: Option>, + ) -> Option<(usize, Vec)> { + let mut consumed = data.len(); + let mut count: usize = 0; + let mut result: *mut BNInstructionTextToken = std::ptr::null_mut(); + let ctx_ptr: *mut c_void = context.map_or(std::ptr::null_mut(), |p| p.as_ptr()); + unsafe { + if BNGetInstructionTextWithContext( + self.handle, + data.as_ptr(), + addr, + &mut consumed, + ctx_ptr, + &mut result, + &mut count, + ) { + let instr_text_tokens = std::slice::from_raw_parts(result, count) + .iter() + .map(InstructionTextToken::from_raw) + .collect(); + BNFreeInstructionText(result, count); + Some((consumed, instr_text_tokens)) + } else { + None + } + } + } + fn instruction_llil( &self, data: &[u8], @@ -753,6 +817,8 @@ impl Architecture for CoreArchitecture { unsafe { BNArchitectureLiftFunction(self.handle, function.handle, context.handle) } } + fn free_function_arch_context(&self, _context: Option>) {} + fn flag_write_llil<'a>( &self, _flag: Self::Flag, @@ -1419,6 +1485,43 @@ where true } + pub unsafe extern "C" fn cb_get_instruction_text_with_context( + ctxt: *mut c_void, + data: *const u8, + addr: u64, + len: *mut usize, + context: *mut c_void, + result: *mut *mut BNInstructionTextToken, + count: *mut usize, + ) -> bool + where + A: 'static + Architecture> + Send + Sync, + { + let custom_arch = unsafe { &*(ctxt as *mut A) }; + let data = unsafe { std::slice::from_raw_parts(data, *len) }; + let result = unsafe { &mut *result }; + let context = NonNull::new(context); + + let Some((res_size, res_tokens)) = + custom_arch.instruction_text_with_context(data, addr, context) + else { + return false; + }; + + let res_tokens: Box<[BNInstructionTextToken]> = res_tokens + .into_iter() + .map(InstructionTextToken::into_raw) + .collect(); + unsafe { + // NOTE: Freed with `cb_free_instruction_text` + let res_tokens = Box::leak(res_tokens); + *result = res_tokens.as_mut_ptr(); + *count = res_tokens.len(); + *len = res_size; + } + true + } + extern "C" fn cb_free_instruction_text(tokens: *mut BNInstructionTextToken, count: usize) { unsafe { let raw_tokens = std::slice::from_raw_parts_mut(tokens, count); @@ -1485,6 +1588,15 @@ where custom_arch.lift_function(function, &mut context) } + extern "C" fn cb_free_function_arch_context(ctxt: *mut c_void, context: *mut c_void) + where + A: 'static + Architecture> + Send + Sync, + { + let custom_arch = unsafe { &*(ctxt as *mut A) }; + let context = NonNull::new(context); + custom_arch.free_function_arch_context(context); + } + extern "C" fn cb_reg_name(ctxt: *mut c_void, reg: u32) -> *mut c_char where A: 'static + Architecture> + Send + Sync, @@ -2401,10 +2513,12 @@ where getAssociatedArchitectureByAddress: Some(cb_associated_arch_by_addr::), getInstructionInfo: Some(cb_instruction_info::), getInstructionText: Some(cb_get_instruction_text::), + getInstructionTextWithContext: Some(cb_get_instruction_text_with_context::), freeInstructionText: Some(cb_free_instruction_text), getInstructionLowLevelIL: Some(cb_instruction_llil::), analyzeBasicBlocks: Some(cb_analyze_basic_blocks::), liftFunction: Some(cb_lift_function::), + freeFunctionArchContext: Some(cb_free_function_arch_context::), getRegisterName: Some(cb_reg_name::), getFlagName: Some(cb_flag_name::), From 51fa178b81e265d50981878f23c95c224c5d125a Mon Sep 17 00:00:00 2001 From: Brandon Miller Date: Fri, 6 Feb 2026 10:28:08 -0500 Subject: [PATCH 2/3] Allow arch hooks to set BBAC function arch context Allow architecture hooks to set the BasicBlockAnalysisContext's functionArchContext and make SetFunctionArchContext return false if the context has already been set in the hook chain. This ensures that arch hooks can't call into the base class and then override the function arch context if the base class sets it. It also ensures the base class can bail gracefully if the arch hook tries to set the FAC before calling into the base class, if needed. --- architecture.cpp | 12 ++++++++++-- binaryninjaapi.h | 2 +- binaryninjacore.h | 1 - python/architecture.py | 16 ++++++++-------- rust/src/architecture/basic_block.rs | 2 -- 5 files changed, 19 insertions(+), 14 deletions(-) diff --git a/architecture.cpp b/architecture.cpp index c65f6b2d8..a6e2fba71 100644 --- a/architecture.cpp +++ b/architecture.cpp @@ -307,6 +307,16 @@ std::map& BasicBlockAnalysisContext::GetInlinedUnresol } +bool BasicBlockAnalysisContext::SetFunctionArchContext(void* context) +{ + if (m_context->functionArchContext) + return false; + + m_context->functionArchContext = context; + return true; +} + + void BasicBlockAnalysisContext::AddTempOutgoingReference(Function* targetFunc) { BNAnalyzeBasicBlocksContextAddTempReference(m_context, targetFunc->m_object); @@ -461,8 +471,6 @@ void BasicBlockAnalysisContext::Finalize() delete[] values; } } - - BNAnalyzeBasicBlocksContextFinalize(m_context); } diff --git a/binaryninjaapi.h b/binaryninjaapi.h index ca73e3915..87277d906 100644 --- a/binaryninjaapi.h +++ b/binaryninjaapi.h @@ -9401,7 +9401,7 @@ namespace BinaryNinja { std::map& GetInlinedUnresolvedIndirectBranches(); void* GetFunctionArchContext() { return m_context->functionArchContext; } - void SetFunctionArchContext(void* context) { m_context->functionArchContext = context; } + bool SetFunctionArchContext(void* context); void AddTempOutgoingReference(Function* targetFunc); diff --git a/binaryninjacore.h b/binaryninjacore.h index 363d825b2..e4dbb056b 100644 --- a/binaryninjacore.h +++ b/binaryninjacore.h @@ -5601,7 +5601,6 @@ extern "C" // BNAnalyzeBasicBlockContext operations BINARYNINJACOREAPI BNBasicBlock* BNAnalyzeBasicBlocksContextCreateBasicBlock(BNBasicBlockAnalysisContext* abb, BNArchitecture* arch, uint64_t addr); BINARYNINJACOREAPI void BNAnalyzeBasicBlocksContextAddBasicBlockToFunction(BNBasicBlockAnalysisContext* abb, BNBasicBlock* block); - BINARYNINJACOREAPI void BNAnalyzeBasicBlocksContextFinalize(BNBasicBlockAnalysisContext* abb); BINARYNINJACOREAPI void BNAnalyzeBasicBlocksContextAddTempReference(BNBasicBlockAnalysisContext* abb, BNFunction* target); diff --git a/python/architecture.py b/python/architecture.py index 8b9a5491c..519cbb2e2 100644 --- a/python/architecture.py +++ b/python/architecture.py @@ -96,7 +96,6 @@ class BasicBlockAnalysisContext: _direct_code_references: Dict[int, "function.ArchAndAddr"] _direct_no_return_calls: Set["function.ArchAndAddr"] _halted_disassembly_addresses: Set["function.ArchAndAddr"] - _function_arch_context_token: int @staticmethod def from_core_struct(bn_bb_context: core.BNBasicBlockAnalysisContext) -> "BasicBlockAnalysisContext": @@ -167,7 +166,6 @@ def from_core_struct(bn_bb_context: core.BNBasicBlockAnalysisContext) -> "BasicB _contextual_returns=contextual_returns, _contextual_returns_dirty=False, _direct_code_references=direct_code_references, _direct_no_return_calls=direct_no_return_calls, _halted_disassembly_addresses=halted_disassembly_addresses, - _function_arch_context_token=bn_bb_context.functionArchContext, ) @property @@ -328,15 +326,20 @@ def add_halted_disassembly_address(self, loc: "function.ArchAndAddr") -> None: def function_arch_context(self) -> Any: """Get the function architecture context""" - return self._function.arch.function_arch_contexts.get(self._function_arch_context_token, None) + tok = int(self._handle.functionArchContext or 0) + if tok == 0: + return None + return self._function.arch.function_arch_contexts.get(tok, None) @function_arch_context.setter def function_arch_context(self, value: Any) -> None: """Set the function architecture context""" - token = id(self._function) + if self._handle.functionArchContext: + raise ValueError("Function architecture context has already been set") + token = self._function.start self._function.arch.function_arch_contexts[token] = value - self._function_arch_context_token = token + self._handle.functionArchContext = ctypes.c_void_p(token) def create_basic_block(self, arch: "Architecture", start: int) -> Optional["basicblock.BasicBlock"]: """ @@ -422,9 +425,6 @@ def finalize(self) -> None: values[i] = value core.BNAnalyzeBasicBlocksContextSetContextualFunctionReturns(self._handle, returns, values, total) - self._handle.functionArchContext = self._function_arch_context_token - core.BNAnalyzeBasicBlocksContextFinalize(self._handle) - @dataclass class FunctionLifterContext: diff --git a/rust/src/architecture/basic_block.rs b/rust/src/architecture/basic_block.rs index 428a97a16..21dddb4c1 100644 --- a/rust/src/architecture/basic_block.rs +++ b/rust/src/architecture/basic_block.rs @@ -328,8 +328,6 @@ impl BasicBlockAnalysisContext { if self.contextual_returns_dirty { self.update_contextual_returns(); } - - unsafe { BNAnalyzeBasicBlocksContextFinalize(self.handle) }; } } From 48538a2f7464dbbc58554af6bac55fdc3af137f2 Mon Sep 17 00:00:00 2001 From: Brandon Miller Date: Fri, 6 Feb 2026 12:58:43 -0500 Subject: [PATCH 3/3] Add templated ArchitectureWithFunctionContext class Allows consumer C++ architecture plugins to provide any type for the function context without having to cast to/from void * --- architecture.cpp | 11 +---- binaryninjaapi.h | 105 +++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 89 insertions(+), 27 deletions(-) diff --git a/architecture.cpp b/architecture.cpp index a6e2fba71..acc473187 100644 --- a/architecture.cpp +++ b/architecture.cpp @@ -307,12 +307,11 @@ std::map& BasicBlockAnalysisContext::GetInlinedUnresol } -bool BasicBlockAnalysisContext::SetFunctionArchContext(void* context) +bool BasicBlockAnalysisContext::SetFunctionArchContextRaw(void* p) { if (m_context->functionArchContext) return false; - - m_context->functionArchContext = context; + m_context->functionArchContext = p; return true; } @@ -591,12 +590,6 @@ std::set& FunctionLifterContext::GetInlinedCalls() } -void *FunctionLifterContext::GetFunctionArchContext() -{ - return m_functionArchContext; -} - - void FunctionLifterContext::SetContainsInlinedFunctions(bool value) { *m_containsInlinedFunctions = value; diff --git a/binaryninjaapi.h b/binaryninjaapi.h index 87277d906..b2019598a 100644 --- a/binaryninjaapi.h +++ b/binaryninjaapi.h @@ -9400,8 +9400,20 @@ namespace BinaryNinja { std::set& GetHaltedDisassemblyAddresses(); std::map& GetInlinedUnresolvedIndirectBranches(); - void* GetFunctionArchContext() { return m_context->functionArchContext; } - bool SetFunctionArchContext(void* context); + bool SetFunctionArchContextRaw(void* p); + void* GetFunctionArchContextRaw() const { return m_context->functionArchContext; } + + template + bool SetFunctionArchContext(const ArchT* arch, typename ArchT::FunctionArchContext* context) + { + return arch->SetFunctionArchContext(*this, context); + } + + template + typename ArchT::FunctionArchContext* GetFunctionArchContext(const ArchT* arch) + { + return arch->GetFunctionArchContext(*this); + } void AddTempOutgoingReference(Function* targetFunc); @@ -9441,7 +9453,12 @@ namespace BinaryNinja { std::map>& GetAutoIndirectBranches(); std::set& GetInlinedCalls(); void SetContainsInlinedFunctions(bool value); - void* GetFunctionArchContext(); + void* GetFunctionArchContextRaw() const { return m_functionArchContext; } + template + typename ArchT::FunctionArchContext* GetFunctionArchContext(const ArchT* arch) + { + return arch->GetFunctionArchContext(*this); + } void CheckForInlinedCall(BasicBlock* block, size_t instrCountBefore, size_t instrCountAfter, uint64_t prevAddr, uint64_t addr, const uint8_t* opcode, size_t len, @@ -9660,17 +9677,7 @@ namespace BinaryNinja { virtual bool GetInstructionText( const uint8_t* data, uint64_t addr, size_t& len, std::vector& result) = 0; - /*! Retrieves a list of InstructionTextTokens while supplying contextual information - - \note Architecture subclasses can implement this method to provide contextual information from AnalyzeBasicBlocks - - \param[in] data pointer to the instruction data to retrieve text for - \param[in] addr address of the instruction data to retrieve text for - \param[out] len will be written to with the length of the instruction data which was translated - \param[in] context context to use when retrieving instruction text - \param[out] result - \return Whether instruction info was successfully retrieved. - */ + /* For use in architecture plugins that inherit from ArchitectureWithFunctionContext */ virtual bool GetInstructionTextWithContext(const uint8_t* data, uint64_t addr, size_t& len, void* context, std::vector& result); @@ -9700,10 +9707,7 @@ namespace BinaryNinja { */ virtual bool LiftFunction(LowLevelILFunction* function, FunctionLifterContext& context); - /*! Free the function architecture context - - \param context Function architecture context - */ + /* For use in architecture plugins that inherit from ArchitectureWithFunctionContext */ virtual void FreeFunctionArchContext(void* context); /*! Gets a register name from a register index. @@ -10081,6 +10085,71 @@ namespace BinaryNinja { void AddArchitectureRedirection(Architecture* from, Architecture* to); }; + /*! The ArchitectureWithFunctionContext class is to be inherited by architecture plugins that need to maintain a + * function context that is set during AnalyzeBasicBlocks and accessed in LiftFunction and/or + * GetInstructionTextWithContext. + + \ingroup architectures + */ + template + class ArchitectureWithFunctionContext : public Architecture + { + public: + using Architecture::Architecture; + using FunctionArchContext = FnCtxT; + + /*! Set the function architecture context + + \param bbac Basic block analysis context + \param ctx Function architecture context + \return True if the context was set successfully + */ + bool SetFunctionArchContext(BasicBlockAnalysisContext& bbac, FnCtxT* ctx) const + { + return bbac.SetFunctionArchContextRaw(static_cast(ctx)); + } + + /*! Get the function architecture context from the basic block analysis context + + \param bbac Basic block analysis context + \return Function architecture context + */ + FnCtxT* GetFunctionArchContext(const BasicBlockAnalysisContext& bbac) const + { + return static_cast(bbac.GetFunctionArchContextRaw()); + } + + /*! Free the function architecture context + \param context Function architecture context + */ + virtual void FreeFunctionArchContext(FnCtxT* context) {} + void FreeFunctionArchContext(void* context) override final + { + FreeFunctionArchContext(static_cast(context)); + } + + /*! Get instruction text with function context + + \param data Pointer to the instruction data to retrieve text for + \param addr Address of the instruction data to retrieve text for + \param len Will be written to with the length of the instruction data which was translated + \param context Context to use when retrieving instruction text + \param result Output vector of instruction text tokens + \return Whether instruction info was successfully retrieved. + */ + virtual bool GetInstructionTextWithContext( + const uint8_t* data, uint64_t addr, size_t& len, FnCtxT* context, std::vector& result) + { + return Architecture::GetInstructionTextWithContext(data, addr, len, static_cast(context), result); + } + + bool GetInstructionTextWithContext(const uint8_t* data, uint64_t addr, size_t& len, void* context, + std::vector& result) override final + { + return GetInstructionTextWithContext(data, addr, len, static_cast(context), result); + } + }; + /*! \ingroup architectures