Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ ql/python/ql/src/experimental/Security/CWE-079/EmailXss.ql
ql/python/ql/src/experimental/Security/CWE-091/XsltInjection.ql
ql/python/ql/src/experimental/Security/CWE-094/Js2Py.ql
ql/python/ql/src/experimental/Security/CWE-1236/CsvInjection.ql
ql/python/ql/src/experimental/Security/CWE-1427/PromptInjection.ql
ql/python/ql/src/experimental/Security/CWE-176/UnicodeBypassValidation.ql
ql/python/ql/src/experimental/Security/CWE-208/TimingAttackAgainstHash/PossibleTimingAttackAgainstHash.ql
ql/python/ql/src/experimental/Security/CWE-208/TimingAttackAgainstHash/TimingAttackAgainstHash.ql
Expand Down
5 changes: 5 additions & 0 deletions python/ql/lib/change-notes/2026-01-02-prompt-injection.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
category: minorAnalysis
---
* Added experimental query `py/prompt-injection` to detect potential prompt injection vulnerabilities in code using LLMs.
* Added taint flow model and type model for `agents` and `openai` modules.
6 changes: 6 additions & 0 deletions python/ql/lib/semmle/python/frameworks/agent.model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
extensions:
- addsTo:
pack: codeql/python-all
extensible: sinkModel
data:
- ['agents', 'Member[Agent].Argument[instructions:]', 'prompt-injection']
12 changes: 12 additions & 0 deletions python/ql/lib/semmle/python/frameworks/openai.model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
extensions:
- addsTo:
pack: codeql/python-all
extensible: sinkModel
data:
- ['OpenAI', 'Member[beta].Member[assistants].Member[create].Argument[instructions:]', 'prompt-injection']

- addsTo:
pack: codeql/python-all
extensible: typeModel
data:
- ['OpenAI', 'openai', 'Member[OpenAI,AsyncOpenAI,AzureOpenAI].ReturnValue']
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>

<overview>
<p>Prompts can be constructed to bypass the original purposes of an agent and lead to sensitive data leak or
operations that were not intended.</p>
</overview>

<recommendation>
<p>Sanitize user input and also avoid using user input in developer or system level prompts.</p>
</recommendation>

<example>
<p>In the following examples, the cases marked GOOD show secure prompt construction; whereas in the case marked BAD they may be susceptible to prompt injection.</p>
<sample src="examples/example.py" />
</example>

<references>
<li>OpenAI: <a href="https://openai.github.io/openai-guardrails-python">Guardrails</a>.</li>
</references>

</qhelp>
20 changes: 20 additions & 0 deletions python/ql/src/experimental/Security/CWE-1427/PromptInjection.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* @name Prompt injection
* @kind path-problem
* @problem.severity error
* @security-severity 5.0
* @precision high
* @id py/prompt-injection
* @tags security
* experimental
* external/cwe/cwe-1427
*/

import python
import experimental.semmle.python.security.dataflow.PromptInjectionQuery
import PromptInjectionFlow::PathGraph

from PromptInjectionFlow::PathNode source, PromptInjectionFlow::PathNode sink
where PromptInjectionFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "This prompt construction depends on a $@.", source.getNode(),
"user-provided value"
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from flask import Flask, request
from agents import Agent
from guardrails import GuardrailAgent

@app.route("/parameter-route")
def get_input():
input = request.args.get("input")

goodAgent = GuardrailAgent( # GOOD: Agent created with guardrails automatically configured.
config=Path("guardrails_config.json"),
name="Assistant",
instructions="This prompt is customized for " + input)

badAgent = Agent(
name="Assistant",
instructions="This prompt is customized for " + input # BAD: user input in agent instruction.
)
25 changes: 25 additions & 0 deletions python/ql/src/experimental/semmle/python/Concepts.qll
Original file line number Diff line number Diff line change
Expand Up @@ -483,3 +483,28 @@ class EmailSender extends DataFlow::Node instanceof EmailSender::Range {
*/
DataFlow::Node getABody() { result in [super.getPlainTextBody(), super.getHtmlBody()] }
}

/**
* A data-flow node that prompts an AI model.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `AIPrompt::Range` instead.
*/
class AIPrompt extends DataFlow::Node instanceof AIPrompt::Range {
/** Gets an input that is used as AI prompt. */
DataFlow::Node getAPrompt() { result = super.getAPrompt() }
}

/** Provides a class for modeling new AI prompting mechanisms. */
module AIPrompt {
/**
* A data-flow node that prompts an AI model.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `AIPrompt` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets an input that is used as AI prompt. */
abstract DataFlow::Node getAPrompt();
}
}
1 change: 1 addition & 0 deletions python/ql/src/experimental/semmle/python/Frameworks.qll
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ private import experimental.semmle.python.frameworks.Scrapli
private import experimental.semmle.python.frameworks.Twisted
private import experimental.semmle.python.frameworks.JWT
private import experimental.semmle.python.frameworks.Csv
private import experimental.semmle.python.frameworks.OpenAI
private import experimental.semmle.python.libraries.PyJWT
private import experimental.semmle.python.libraries.Python_JWT
private import experimental.semmle.python.libraries.Authlib
Expand Down
88 changes: 88 additions & 0 deletions python/ql/src/experimental/semmle/python/frameworks/OpenAI.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/**
* Provides classes modeling security-relevant aspects of the `openAI` Agents SDK package.
* See https://github.com/openai/openai-agents-python.
* As well as the regular openai python interface.
* See https://github.com/openai/openai-python.
*/

private import python
private import semmle.python.ApiGraphs

/**
* Provides models for agents SDK (instances of the `agents.Runner` class etc).
*
* See https://github.com/openai/openai-agents-python.
*/
module AgentSDK {

Check warning

Code scanning / CodeQL

Acronyms should be PascalCase/camelCase Warning

Acronyms in AgentSDK should be PascalCase/camelCase.
/** Gets a reference to the `agents.Runner` class. */
API::Node classRef() { result = API::moduleImport("agents").getMember("Runner") }

/** Gets a reference to the `run` members. */
API::Node runMembers() { result = classRef().getMember(["run", "run_sync", "run_streamed"]) }

/** Gets a reference to a potential property of `agents.Runner` called input which can refer to a system prompt depending on the role specified. */
API::Node getContentNode() {
result = runMembers().getKeywordParameter("input").getASubscript().getSubscript("content")
or
result = runMembers().getParameter(_).getASubscript().getSubscript("content")
}
}

/**
* Provides models for Agent (instances of the `openai.OpenAI` class).
*
* See https://github.com/openai/openai-python.
*/
module OpenAI {
/** Gets a reference to the `openai.OpenAI` class. */
API::Node classRef() {
result =
API::moduleImport("openai").getMember(["OpenAI", "AsyncOpenAI", "AzureOpenAI"]).getReturn()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like you already have these in MaD, can you not just reuse those?

Copy link
Contributor Author

@mbaluda mbaluda Jan 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

classRef is used in getContentNode so that we can use some logic to only mark as sink the innermost element in the object structure

}

/** Gets a reference to a potential property of `openai.OpenAI` called instructions which refers to the system prompt. */
API::Node getContentNode() {
exists(API::Node content |
content =
classRef()
.getMember("responses")
.getMember("create")
.getKeywordParameter(["input", "instructions"])
or
content =
classRef()
.getMember("responses")
.getMember("create")
.getKeywordParameter(["input", "instructions"])
.getASubscript()
.getSubscript("content")
or
content =
classRef()
.getMember("realtime")
.getMember("connect")
.getReturn()
.getMember("conversation")
.getMember("item")
.getMember("create")
.getKeywordParameter("item")
.getSubscript("content")
or
content =
classRef()
.getMember("chat")
.getMember("completions")
.getMember("create")
.getKeywordParameter("messages")
.getASubscript()
.getSubscript("content")
|
// content
if not exists(content.getASubscript())
then result = content
else
// content.text
result = content.getASubscript().getSubscript("text")
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "prompt injection"
* vulnerabilities, as well as extension points for adding your own.
*/

import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import experimental.semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
private import experimental.semmle.python.frameworks.OpenAI

/**
* Provides default sources, sinks and sanitizers for detecting
* "prompt injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module PromptInjection {
/**
* A data flow source for "prompt injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }

/**
* A data flow sink for "prompt injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }

/**
* A sanitizer for "prompt injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }

/**
* An active threat-model source, considered as a flow source.
*/
private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

/**
* A prompt to an AI model, considered as a flow sink.
*/
class AIPromptAsSink extends Sink {
AIPromptAsSink() { this = any(AIPrompt p).getAPrompt() }
}

private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("prompt-injection").asSink() }
}

private class PromptContentSink extends Sink {
PromptContentSink() {
this = OpenAI::getContentNode().asSink()
or
this = AgentSDK::getContentNode().asSink()
}
}

/**
* A comparison with a constant, considered as a sanitizer-guard.
*/
class ConstCompareAsSanitizerGuard extends Sanitizer, ConstCompareBarrier { }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/**
* Provides a taint-tracking configuration for detecting "prompt injection" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `PromptInjection::Configuration` is needed, otherwise
* `PromptInjectionCustomizations` should be imported instead.
*/

private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import PromptInjectionCustomizations::PromptInjection

private module PromptInjectionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { node instanceof Source }

predicate isSink(DataFlow::Node node) { node instanceof Sink }

predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }

predicate observeDiffInformedIncrementalMode() { any() }
}

/** Global taint-tracking for detecting "prompt injection" vulnerabilities. */
module PromptInjectionFlow = TaintTracking::Global<PromptInjectionConfig>;
Loading
Loading