Skip to content

Commit a864ffa

Browse files
Use local resources in CodeQL for Python MCP
1 parent 4af4a65 commit a864ffa

File tree

6 files changed

+29
-102
lines changed

6 files changed

+29
-102
lines changed

src/seclab_taskflows/mcp_servers/codeql_python/codeql_sqlite_models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ class Source(Base):
1515
id: Mapped[int] = mapped_column(primary_key=True)
1616
repo: Mapped[str]
1717
source_location: Mapped[str]
18+
line: Mapped[int]
1819
type: Mapped[str]
1920
notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
2021

src/seclab_taskflows/mcp_servers/codeql_python/mcp_server.py

Lines changed: 12 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def source_to_dict(result):
4545
"source_id": result.id,
4646
"repo": result.repo,
4747
"source_location": result.source_location,
48+
"line": result.line,
4849
"type": result.type,
4950
"notes": result.notes
5051
}
@@ -84,17 +85,17 @@ def __init__(self, memcache_state_dir: str):
8485
Base.metadata.create_all(self.engine, tables=[Source.__table__])
8586

8687

87-
def store_new_source(self, repo, source_location, type, notes, update = False):
88+
def store_new_source(self, repo, source_location, line, type, notes, update = False):
8889
with Session(self.engine) as session:
89-
existing = session.query(Source).filter_by(repo = repo, source_location = source_location).first()
90+
existing = session.query(Source).filter_by(repo = repo, source_location = source_location, line = line).first()
9091
if existing:
9192
existing.notes = (existing.notes or "") + notes
9293
session.commit()
93-
return f"Updated notes for source at {source_location} in {repo}."
94+
return f"Updated notes for source at {source_location}, line {line} in {repo}."
9495
else:
9596
if update:
96-
return f"No source exists at repo {repo}, location {source_location}"
97-
new_source = Source(repo = repo, source_location = source_location, type = type, notes = notes)
97+
return f"No source exists at repo {repo}, location {source_location}, line {line} to update."
98+
new_source = Source(repo = repo, source_location = source_location, line = line, type = type, notes = notes)
9899
session.add(new_source)
99100
session.commit()
100101
return f"Added new source for {source_location} in {repo}."
@@ -174,7 +175,8 @@ def remote_sources(owner: str, repo: str,
174175
repo=repo,
175176
source_location=result.get('location', ''),
176177
type=result.get('source', ''),
177-
notes='', #result.get('description', ''),
178+
line=int(result.get('line', '0')),
179+
notes=None, #result.get('description', ''),
178180
update=False
179181
)
180182
stored_count += 1
@@ -191,18 +193,15 @@ def fetch_sources(owner: str, repo: str):
191193

192194
@mcp.tool()
193195
def add_source_notes(owner: str, repo: str,
194-
database_path: str = Field(description="The CodeQL database path."),
195-
source_location: str = Field(description="The path to the file and column info that contains the source"),
196+
# database_path: str = Field(description="The CodeQL database path."),
197+
source_location: str = Field(description="The path to the file"),
198+
line: int = Field(description="The line number of the source"),
196199
notes: str = Field(description="The notes to append to this source", default="")):
197200
"""
198201
Add new notes to an existing source. The notes will be appended to any existing notes.
199202
"""
200203
repo = f"{owner}/{repo}"
201-
try:
202-
database_path = _resolve_db_path(database_path)
203-
except RuntimeError:
204-
return f"The database path for {database_path} could not be resolved"
205-
return backend.store_new_source(repo, source_location, "", notes, update=True)
204+
return backend.store_new_source(repo = repo, source_location = source_location, line = line, type = "", notes = notes, update=True)
206205

207206
@mcp.tool()
208207
def clear_codeql_repo(owner: str, repo: str):
@@ -216,45 +215,5 @@ def clear_codeql_repo(owner: str, repo: str):
216215
session.commit()
217216
return f"Cleared {deleted_sources} sources from repo {repo}."
218217

219-
@mcp.tool()
220-
def get_file_contents(
221-
file_uri: str = Field(description="The file URI to get contents for. The URI scheme is defined as `file://path` and `file://path:region`. Examples of file URI: `file:///path/to/file:1:2:3:4`, `file:///path/to/file`. File URIs optionally contain a region definition that looks like `start_line:start_column:end_line:end_column` which will limit the contents returned to the specified region, for example `file:///path/to/file:1:2:3:4` indicates a file region of `1:2:3:4` which would return the content of the file starting at line 1, column 1 and ending at line 3 column 4. Line and column indices are 1-based, meaning line and column values start at 1. If the region is omitted the full contents of the file will be returned, for example `file:///path/to/file` returns the full contents of `/path/to/file`."),
222-
database_path: str = Field(description="The path to the CodeQL database.")):
223-
"""Get the contents of a file URI from a CodeQL database path."""
224-
225-
database_path = _resolve_db_path(database_path)
226-
try:
227-
# fix up any incorrectly formatted relative path uri
228-
if not file_uri.startswith('file:///'):
229-
if file_uri.startswith('file://'):
230-
file_uri = file_uri[len('file://'):]
231-
file_uri = 'file:///' + file_uri.lstrip('/')
232-
results = _get_file_contents(database_path, file_uri)
233-
except Exception as e:
234-
results = f"Error: could not retrieve {file_uri}: {e}"
235-
return results
236-
237-
@mcp.tool()
238-
def list_source_files(database_path: str = Field(description="The path to the CodeQL database."),
239-
regex_filter: str = Field(description="Optional Regex filter.", default = r'[\s\S]+')):
240-
"""List the available source files in a CodeQL database using their file:// URI"""
241-
database_path = _resolve_db_path(database_path)
242-
results = list_src_files(database_path, as_uri=True)
243-
return json.dumps([{'uri': item} for item in results if re.search(regex_filter, item)], indent=2)
244-
245-
@mcp.tool()
246-
def search_in_source_code(database_path: str = Field(description="The path to the CodeQL database."),
247-
search_term: str = Field(description="The term to search in the source code")):
248-
"""
249-
Search for a string in the source code. Returns the line number and file.
250-
"""
251-
resolved_database_path = _resolve_db_path(database_path)
252-
results = search_in_src_archive(resolved_database_path, search_term)
253-
out = []
254-
if isinstance(results, dict):
255-
for k,v in results.items():
256-
out.append({"database" : database_path, "path" : k, "lines" : v})
257-
return json.dumps(out, indent = 2)
258-
259218
if __name__ == "__main__":
260219
mcp.run(show_banner=False, transport="http", host="127.0.0.1", port=9998)

src/seclab_taskflows/mcp_servers/codeql_python/queries/mcp-python/remote_sources.ql

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,15 @@
77
import python
88
import semmle.python.dataflow.new.RemoteFlowSources
99

10-
string normalizeLocation(Location l) {
11-
result = "file://" + "/" + l.getFile().getRelativePath() + ":" + l.getStartLine().toString() + ":" + l.getStartColumn().toString()
12-
+ ":" + l.getEndLine().toString() + ":" + l.getEndColumn().toString()
13-
}
10+
// string normalizeLocation(Location l) {
11+
// result = l.getFile().getRelativePath() + ":" + l.getStartLine().toString() + ":" + l.getStartColumn().toString()
12+
// + ":" + l.getEndLine().toString() + ":" + l.getEndColumn().toString()
13+
// }
1414

1515
from RemoteFlowSource source
1616
select
17-
"Remote source {0} is defined at {1}", "source,location", source.getSourceType(), normalizeLocation(source.getLocation())
17+
"Remote source {0} is defined at {1} line {2}",
18+
"source,location,line",
19+
source.getSourceType(),
20+
source.getLocation().getFile().getRelativePath(),
21+
source.getLocation().getStartLine().toString()
File renamed without changes.

src/seclab_taskflows/taskflows/audit/remote_sources_local.yaml

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ model_config: seclab_taskflows.configs.model_config
99

1010
globals:
1111
repo:
12-
apache/allura
1312
# Taskflow to analyze the existing information
1413
taskflow:
1514
- task:
@@ -31,11 +30,9 @@ taskflow:
3130
- seclab_taskflow_agent.personalities.assistant
3231
user_prompt: |
3332
For the repo {{ GLOBALS_repo }} fetch the Python CodeQL database and find all remote flow sources using CodeQL.
34-
Store the value for CodeQL's 'relative_database_path' in 'codeql_relative_database_path' memory key.
3533
toolboxes:
3634
- seclab_taskflows.toolboxes.gh_code_scanning
3735
- seclab_taskflows.toolboxes.codeql_python
38-
- seclab_taskflow_agent.toolboxes.memcache
3936
- task:
4037
must_complete: true
4138
exclude_from_context: true
@@ -50,25 +47,21 @@ taskflow:
5047
model: code_analysis
5148
must_complete: false
5249
repeat_prompt: true
53-
async: true
54-
async_limit: 5
5550
max_steps: 100
5651
name: source analysis
5752
description: Identify actions that untrusted users are allowed perform the source.
5853
agents:
59-
- seclab_taskflows.personalities.auditer
54+
- seclab_taskflows.personalities.auditor
6055
user_prompt: |
61-
Retrieve the contents of the `codeql_relative_database_path` memory key, which represents the relative path to the CodeQL database for the repository {{ GLOBALS_repo }}.
62-
Using the CodeQL database located at that path, analyze the following source:
63-
The source is a {{ RESULT_type }} in {{ RESULT_repo }} in the location {{ RESULT_source_location }}.
64-
Analyze what the source endpoint is for and how it is used.
65-
Search for relevant code associated with each source.
56+
Fetch the zipball of the repository {{ GLOBALS_repo }} and use it to analyze the source.
57+
The source is a {{ RESULT_type }} in {{ RESULT_repo }} in the location {{ RESULT_source_location }} on line {{ RESULT_line }}.
58+
If the source is in a folder relating to tests or demo code, skip the analysis and update the source entry in the codeql_sqlite database indicating it is not relevant.
59+
Analyze what the source endpoint is used for.
6660
If it is a web endpoint, identify the routing path that reaches this source, HTTP method,
6761
any middlewares used, which roles are allowed to call it.
6862
Note which kind of authentication is required for that endpoint.
6963
It is possible that the source does not have require any authentication.
7064
If authorization is required, note the details.
71-
Analyze the code and identify if this source could lead to a security vulnerability.
7265
7366
Update the source entry in the codeql_sqlite database with your findings.
7467
## IMPORTANT: General Guidance that ALWAYS applies
@@ -82,7 +75,8 @@ taskflow:
8275
the error you encountered.
8376
toolboxes:
8477
- seclab_taskflows.toolboxes.codeql_python
85-
- seclab_taskflow_agent.toolboxes.memcache
78+
- seclab_taskflows.toolboxes.local_gh_resources
79+
- seclab_taskflows.toolboxes.local_file_viewer
8680
- task:
8781
must_complete: true
8882
agents:

src/seclab_taskflows/toolboxes/codeql_python.yaml

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -46,34 +46,3 @@ server_prompt: |
4646
If you are unable to determine the appropriate programming language acronym,
4747
halt your task and ask the user to clarify which programming language the
4848
CodeQL database in question was created for.
49-
50-
51-
### CodeQL Database File URI
52-
53-
The CodeQL database file URI scheme is defined as `file://path` and
54-
`file://path:region`.
55-
56-
Examples of CodeQL database file URIs:
57-
58-
- `file:///path/to/file:1:2:3:4`
59-
- `file:///path/to/file`
60-
61-
File URIs optionally contain a region definition that looks like
62-
`start_line:start_column:end_line:end_column` which will limit the contents
63-
returned to the specified region. For example `file:///path/to/file:1:2:3:4`
64-
indicates a file region of `1:2:3:4` which would return the content of the
65-
file starting at line 1, column 2 and ending at line 3, column 4. These line
66-
and column indices are 1-based, meaning line and column values start at 1.
67-
68-
If the region is ommitted the full contents of the file will be returned,
69-
for example `file:///path/to/file` returns the full contents of
70-
`/path/to/file`.
71-
72-
If you want to fetch a specific region by their line numbers only, you can set
73-
the `start_column` and `end_column` values of a region to `0`. For example to
74-
retrieve lines 1-4 from a file at `/path/to/file` you can use a file URI
75-
with a region definition such as: `file:///path/to/file:1:0:4:0`.
76-
77-
When unsure how to fetch a specific region, fall back to fetching the full file
78-
contents for a file by omitting the region definition, for example
79-
`file:///path/to/file`

0 commit comments

Comments
 (0)