From f2c6b2944b207aa4b19dbce6ed59375eded9258d Mon Sep 17 00:00:00 2001 From: ObserverOfTime Date: Sun, 14 Dec 2025 13:22:04 +0200 Subject: [PATCH 1/6] build: update core library & test parsers --- .gitmodules | 2 +- docs/conf.py | 2 +- pyproject.toml | 16 ++++++++++------ tree_sitter/core | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/.gitmodules b/.gitmodules index 4b7eed30..86301e82 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "tree-sitter"] url = https://github.com/tree-sitter/tree-sitter path = tree_sitter/core - branch = release-0.25 + branch = release-0.26 diff --git a/docs/conf.py b/docs/conf.py index c8ec05bd..b2fba165 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ } master_doc = "index" language = "en" -needs_sphinx = "8.1" +needs_sphinx = "8.2" templates_path = ["_templates"] intersphinx_mapping = { diff --git a/pyproject.toml b/pyproject.toml index fdb2c042..a485a75b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,14 +31,18 @@ Matrix = "https://matrix.to/#/#tree-sitter-chat:matrix.org" name = "Max Brunsfeld" email = "maxbrunsfeld@gmail.com" +[[project.maintainers]] +name = "ObserverOfTime" +email = "chronobserver@disroot.org" + [project.optional-dependencies] -docs = ["sphinx~=8.1", "sphinx-book-theme"] +docs = ["sphinx~=8.2", "sphinx-book-theme"] tests = [ - "tree-sitter-html>=0.23.2", - "tree-sitter-javascript>=0.23.1", - "tree-sitter-json>=0.24.8", - "tree-sitter-python>=0.23.6", - "tree-sitter-rust>=0.23.2", + "tree-sitter-html==0.23.2", + "tree-sitter-javascript==0.25.0", + "tree-sitter-json==0.24.8", + "tree-sitter-python==0.25.0", + "tree-sitter-rust==0.24.0", ] [tool.ruff] diff --git a/tree_sitter/core b/tree_sitter/core index da6fe9be..cd4b6e2e 160000 --- a/tree_sitter/core +++ b/tree_sitter/core @@ -1 +1 @@ -Subproject commit da6fe9beb4f7f67beb75914ca8e0d48ae48d6406 +Subproject commit cd4b6e2ef996d4baca12caadb78dffc8b55bc869 From 21ab52436bc098eebcbaa531afb2493db82d946c Mon Sep 17 00:00:00 2001 From: ObserverOfTime Date: Sun, 14 Dec 2025 13:41:36 +0200 Subject: [PATCH 2/6] feat!: remove deprecated methods --- docs/classes/tree_sitter.Language.rst | 18 -------- .../classes/tree_sitter.LookaheadIterator.rst | 14 ------- docs/classes/tree_sitter.Node.rst | 4 -- docs/classes/tree_sitter.Parser.rst | 8 ---- docs/classes/tree_sitter.Query.rst | 8 ---- docs/classes/tree_sitter.QueryCursor.rst | 5 +-- tree_sitter/__init__.pyi | 41 ------------------ tree_sitter/binding/language.c | 34 --------------- tree_sitter/binding/parser.c | 39 ++--------------- tree_sitter/binding/query_cursor.c | 42 ++----------------- 10 files changed, 9 insertions(+), 204 deletions(-) diff --git a/docs/classes/tree_sitter.Language.rst b/docs/classes/tree_sitter.Language.rst index 4ec44b1a..3eddea4b 100644 --- a/docs/classes/tree_sitter.Language.rst +++ b/docs/classes/tree_sitter.Language.rst @@ -16,14 +16,8 @@ Language .. automethod:: node_kind_is_named .. automethod:: node_kind_is_supertype .. automethod:: node_kind_is_visible - .. automethod:: query - - .. deprecated:: 0.25.0 - Use the :class:`Query` constructor instead. .. automethod:: subtypes - .. versionadded:: 0.25.0 - Special Methods --------------- @@ -41,21 +35,9 @@ Language ---------- .. autoattribute:: abi_version - - .. versionadded:: 0.25.0 .. autoattribute:: field_count .. autoattribute:: name - - .. versionadded:: 0.25.0 .. autoattribute:: node_kind_count .. autoattribute:: parse_state_count .. autoattribute:: semantic_version - - .. versionadded:: 0.25.0 .. autoattribute:: supertypes - - .. versionadded:: 0.25.0 - .. autoattribute:: version - - .. deprecated:: 0.25.0 - Use :attr:`abi_version` instead. diff --git a/docs/classes/tree_sitter.LookaheadIterator.rst b/docs/classes/tree_sitter.LookaheadIterator.rst index fc5cd302..86c0005a 100644 --- a/docs/classes/tree_sitter.LookaheadIterator.rst +++ b/docs/classes/tree_sitter.LookaheadIterator.rst @@ -8,29 +8,15 @@ LookaheadIterator ------- .. automethod:: names - - .. versionadded:: 0.25.0 - Replaces the ``iter_names`` method .. automethod:: reset - - .. versionadded:: 0.25.0 - Replaces the ``reset_state`` method .. automethod:: symbols - .. versionadded:: 0.25.0 - Special Methods --------------- .. automethod:: __iter__ - - .. versionchanged:: 0.25.0 - Iterates over ``tuple[int, str]`` .. automethod:: __next__ - .. versionchanged:: 0.25.0 - Yields ``tuple[int, str]`` - Attributes ---------- diff --git a/docs/classes/tree_sitter.Node.rst b/docs/classes/tree_sitter.Node.rst index 65f28e8a..d029e13e 100644 --- a/docs/classes/tree_sitter.Node.rst +++ b/docs/classes/tree_sitter.Node.rst @@ -18,11 +18,7 @@ Node .. automethod:: field_name_for_child .. automethod:: field_name_for_named_child .. automethod:: first_child_for_byte - - .. versionadded:: 0.25.0 .. automethod:: first_named_child_for_byte - - .. versionadded:: 0.25.0 .. automethod:: named_child .. automethod:: named_descendant_for_byte_range .. automethod:: named_descendant_for_point_range diff --git a/docs/classes/tree_sitter.Parser.rst b/docs/classes/tree_sitter.Parser.rst index 5a7388c7..dcabf6f0 100644 --- a/docs/classes/tree_sitter.Parser.rst +++ b/docs/classes/tree_sitter.Parser.rst @@ -7,10 +7,6 @@ Parser ------- .. automethod:: parse - - .. versionchanged:: 0.25.0 - * ``encoding`` can be one of ``"utf8", "utf16", "utf16le", "utf16be"``. - * ``progress_callback`` parameter added. .. automethod:: print_dot_graphs .. automethod:: reset @@ -20,7 +16,3 @@ Parser .. autoattribute:: included_ranges .. autoattribute:: language .. autoattribute:: logger - .. autoattribute:: timeout_micros - - .. deprecated:: 0.25.0 - Use the ``progress_callback`` in :meth:`parse`. diff --git a/docs/classes/tree_sitter.Query.rst b/docs/classes/tree_sitter.Query.rst index 3096b62c..6895f863 100644 --- a/docs/classes/tree_sitter.Query.rst +++ b/docs/classes/tree_sitter.Query.rst @@ -21,11 +21,7 @@ Query ------- .. automethod:: capture_name - - .. versionadded:: 0.25.0 .. automethod:: capture_quantifier - - .. versionadded:: 0.25.0 .. automethod:: disable_capture .. automethod:: disable_pattern .. automethod:: end_byte_for_pattern @@ -37,13 +33,9 @@ Query .. automethod:: start_byte_for_pattern .. automethod:: string_value - .. versionadded:: 0.25.0 - Attributes ---------- .. autoattribute:: capture_count .. autoattribute:: pattern_count .. autoattribute:: string_count - - .. versionadded:: 0.25.0 diff --git a/docs/classes/tree_sitter.QueryCursor.rst b/docs/classes/tree_sitter.QueryCursor.rst index 336d8ef2..53289c71 100644 --- a/docs/classes/tree_sitter.QueryCursor.rst +++ b/docs/classes/tree_sitter.QueryCursor.rst @@ -1,10 +1,8 @@ QueryCursor -===== +=========== .. autoclass:: tree_sitter.QueryCursor - .. versionadded:: 0.25.0 - Methods ------- @@ -19,4 +17,3 @@ QueryCursor .. autoattribute:: did_exceed_match_limit .. autoattribute:: match_limit - .. autoattribute:: timeout_micros diff --git a/tree_sitter/__init__.pyi b/tree_sitter/__init__.pyi index f68eac62..d4b58696 100644 --- a/tree_sitter/__init__.pyi +++ b/tree_sitter/__init__.pyi @@ -27,9 +27,6 @@ class Language: def abi_version(self) -> int: ... @property def semantic_version(self) -> tuple[int, int, int] | None: ... - @deprecated("Use abi_version instead") - @property - def version(self) -> int: ... @property def node_kind_count(self) -> int: ... @property @@ -48,8 +45,6 @@ class Language: def field_id_for_name(self, name: str, /) -> int | None: ... def next_state(self, state: int, id: int, /) -> int: ... def lookahead_iterator(self, state: int, /) -> LookaheadIterator | None: ... - @deprecated("Use the Query() constructor instead") - def query(self, source: str, /) -> Query: ... def copy(self) -> Language: ... def __repr__(self) -> str: ... def __eq__(self, other: Any, /) -> bool: ... @@ -226,22 +221,11 @@ class TreeCursor: @final class Parser: - @overload - def __init__( - self, - language: Language | None = None, - *, - included_ranges: Sequence[Range] | None = None, - logger: Callable[[LogType, str], None] | None = None, - ) -> None: ... - @deprecated("timeout_micros is deprecated") - @overload def __init__( self, language: Language | None = None, *, included_ranges: Sequence[Range] | None = None, - timeout_micros: int | None = None, logger: Callable[[LogType, str], None] | None = None, ) -> None: ... @property @@ -256,15 +240,6 @@ class Parser: def included_ranges(self, ranges: Sequence[Range]) -> None: ... @included_ranges.deleter def included_ranges(self) -> None: ... - @deprecated("Use the progress_callback in parse()") - @property - def timeout_micros(self) -> int: ... - @deprecated("Use the progress_callback in parse()") - @timeout_micros.setter - def timeout_micros(self, timeout: int) -> None: ... - @deprecated("Use the progress_callback in parse()") - @timeout_micros.deleter - def timeout_micros(self) -> None: ... @property def logger(self) -> Callable[[LogType, str], None] | None: ... @logger.setter @@ -328,29 +303,13 @@ class Query: @final class QueryCursor: - @overload def __init__(self, query: Query, *, match_limit: int = 0xFFFFFFFF) -> None: ... - @deprecated("timeout_micros is deprecated") - @overload - def __init__( - self, - query: Query, - *, - match_limit: int = 0xFFFFFFFF, - timeout_micros: int = 0 - ) -> None: ... @property def match_limit(self) -> int: ... @match_limit.setter def match_limit(self, limit: int) -> None: ... @match_limit.deleter def match_limit(self) -> None: ... - @deprecated("Use the progress_callback in matches() or captures()") - @property - def timeout_micros(self) -> int: ... - @deprecated("Use the progress_callback in matches() or captures()") - @timeout_micros.setter - def timeout_micros(self, timeout: int) -> None: ... @property def did_exceed_match_limit(self) -> bool: ... def set_max_start_depth(self, depth: int, /) -> None: ... diff --git a/tree_sitter/binding/language.c b/tree_sitter/binding/language.c index 32892860..03ff0099 100644 --- a/tree_sitter/binding/language.c +++ b/tree_sitter/binding/language.c @@ -63,13 +63,6 @@ PyObject *language_get_name(Language *self, void *Py_UNUSED(payload)) { return PyUnicode_FromString(self->name); } -PyObject *language_get_version(Language *self, void *Py_UNUSED(payload)) { - if (REPLACE("version", "abi_version") < 0) { - return NULL; - } - return PyLong_FromUnsignedLong(self->abi_version); -} - PyObject *language_get_abi_version(Language *self, void *Py_UNUSED(payload)) { return PyLong_FromUnsignedLong(self->abi_version); } @@ -237,19 +230,6 @@ PyObject *language_lookahead_iterator(Language *self, PyObject *args) { return PyObject_Init((PyObject *)iter, state->lookahead_iterator_type); } -PyObject *language_query(Language *self, PyObject *args) { - ModuleState *state = GET_MODULE_STATE(self); - char *source; - Py_ssize_t length; - if (!PyArg_ParseTuple(args, "s#:query", &source, &length)) { - return NULL; - } - if (REPLACE("query()", "the Query() constructor") < 0) { - return NULL; - } - return PyObject_CallFunction((PyObject *)state->query_type, "Os#", self, source, length); -} - PyObject *language_copy(Language *self, PyObject *Py_UNUSED(args)) { ModuleState *state = GET_MODULE_STATE(self); Language *copied = PyObject_New(Language, state->language_type); @@ -290,10 +270,6 @@ PyDoc_STRVAR(language_next_state_doc, PyDoc_STRVAR(language_lookahead_iterator_doc, "lookahead_iterator(self, state, /)\n--\n\n" "Create a new :class:`LookaheadIterator` for this language and parse state."); -PyDoc_STRVAR( - language_query_doc, - "query(self, source, /)\n--\n\n" - "Create a new :class:`Query` from a string containing one or more S-expression patterns."); PyDoc_STRVAR(language_copy_doc, "copy(self, /)\n--\n\n" "Create a copy of the language."); PyDoc_STRVAR(language_copy2_doc, "__copy__(self, /)\n--\n\n" @@ -360,12 +336,6 @@ static PyMethodDef language_methods[] = { .ml_flags = METH_VARARGS, .ml_doc = language_lookahead_iterator_doc, }, - { - .ml_name = "query", - .ml_meth = (PyCFunction)language_query, - .ml_flags = METH_VARARGS, - .ml_doc = language_query_doc, - }, { .ml_name = "copy", .ml_meth = (PyCFunction)language_copy, @@ -381,10 +351,6 @@ static PyMethodDef language_methods[] = { static PyGetSetDef language_accessors[] = { {"name", (getter)language_get_name, NULL, PyDoc_STR("The name of the language."), NULL}, - {"version", (getter)language_get_version, NULL, - PyDoc_STR("The ABI version number that indicates which version of " - "the Tree-sitter CLI was used to generate this language."), - NULL}, {"abi_version", (getter)language_get_abi_version, NULL, PyDoc_STR("The ABI version number that indicates which version of " "the Tree-sitter CLI was used to generate this language."), diff --git a/tree_sitter/binding/parser.c b/tree_sitter/binding/parser.c index c7457f01..f0ee4490 100644 --- a/tree_sitter/binding/parser.c +++ b/tree_sitter/binding/parser.c @@ -230,31 +230,6 @@ PyObject *parser_print_dot_graphs(Parser *self, PyObject *arg) { Py_RETURN_NONE; } -PyObject *parser_get_timeout_micros(Parser *self, void *Py_UNUSED(payload)) { - if (DEPRECATE("Use the progress_callback in parse()") < 0) { - return NULL; - } - return PyLong_FromUnsignedLong(ts_parser_timeout_micros(self->parser)); -} - -int parser_set_timeout_micros(Parser *self, PyObject *arg, void *Py_UNUSED(payload)) { - if (DEPRECATE("Use the progress_callback in parse()") < 0) { - return -1; - } - if (arg == NULL || arg == Py_None) { - ts_parser_set_timeout_micros(self->parser, 0); - return 0; - } - if (!PyLong_Check(arg)) { - PyErr_Format(PyExc_TypeError, "'timeout_micros' must be assigned an int, not %s", - arg->ob_type->tp_name); - return -1; - } - - ts_parser_set_timeout_micros(self->parser, PyLong_AsSize_t(arg)); - return 0; -} - PyObject *parser_get_included_ranges(Parser *self, void *Py_UNUSED(payload)) { uint32_t count; const TSRange *ranges = ts_parser_included_ranges(self->parser, &count); @@ -397,11 +372,10 @@ int parser_set_language(Parser *self, PyObject *arg, void *Py_UNUSED(payload)) { int parser_init(Parser *self, PyObject *args, PyObject *kwargs) { ModuleState *state = GET_MODULE_STATE(self); - PyObject *language = NULL, *included_ranges = NULL, *timeout_micros = NULL, *logger = NULL; - char *keywords[] = {"language", "included_ranges", "timeout_micros", "logger", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!$OOO:__init__", keywords, - state->language_type, &language, &included_ranges, - &timeout_micros, &logger)) { + PyObject *language = NULL, *included_ranges = NULL, *logger = NULL; + char *keywords[] = {"language", "included_ranges", "logger", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!$OO:__init__", keywords, + state->language_type, &language, &included_ranges, &logger)) { return -1; } @@ -411,9 +385,6 @@ int parser_init(Parser *self, PyObject *args, PyObject *kwargs) { if (SET_ATTRIBUTE_ERROR(included_ranges)) { return -1; } - if (SET_ATTRIBUTE_ERROR(timeout_micros)) { - return -1; - } if (SET_ATTRIBUTE_ERROR(logger)) { return -1; } @@ -469,8 +440,6 @@ static PyGetSetDef parser_accessors[] = { PyDoc_STR("The language that will be used for parsing."), NULL}, {"included_ranges", (getter)parser_get_included_ranges, (setter)parser_set_included_ranges, PyDoc_STR("The ranges of text that the parser will include when parsing."), NULL}, - {"timeout_micros", (getter)parser_get_timeout_micros, (setter)parser_set_timeout_micros, - PyDoc_STR("The duration in microseconds that parsing is allowed to take."), NULL}, {"logger", (getter)parser_get_logger, (setter)parser_set_logger, PyDoc_STR("The logger that the parser should use during parsing."), NULL}, {NULL}, diff --git a/tree_sitter/binding/query_cursor.c b/tree_sitter/binding/query_cursor.c index 0e1a08a9..a49e46a2 100644 --- a/tree_sitter/binding/query_cursor.c +++ b/tree_sitter/binding/query_cursor.c @@ -1,7 +1,5 @@ #include "types.h" -#include - PyObject *node_new_internal(ModuleState *state, TSNode node, PyObject *tree); bool query_satisfies_predicates(Query *query, TSQueryMatch match, Tree *tree, PyObject *callable); @@ -25,16 +23,14 @@ int query_cursor_init(QueryCursor *self, PyObject *args, PyObject *kwargs) { ModuleState *state = GET_MODULE_STATE(self); PyObject *query = NULL; uint32_t match_limit = UINT32_MAX; - uint64_t timeout_micros = 0; - char *keywords[] = {"query", "match_limit", "timeout_micros", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!|$II:__init__", keywords, state->query_type, - &query, &match_limit, &timeout_micros)) { + char *keywords[] = {"query", "match_limit", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!|$I:__init__", keywords, state->query_type, + &query, &match_limit)) { return -1; } self->query = Py_NewRef(query); ts_query_cursor_set_match_limit(self->cursor, match_limit); - ts_query_cursor_set_timeout_micros(self->cursor, timeout_micros); return 0; } @@ -237,32 +233,7 @@ int query_cursor_set_match_limit(QueryCursor *self, PyObject *arg, void *Py_UNUS return -1; } - ts_query_cursor_set_timeout_micros(self->cursor, PyLong_AsSize_t(arg)); - return 0; -} - -PyObject *query_cursor_get_timeout_micros(QueryCursor *self, void *Py_UNUSED(payload)) { - if (DEPRECATE("Use the progress_callback in matches() or captures()") < 0) { - return NULL; - } - return PyLong_FromUnsignedLong(ts_query_cursor_timeout_micros(self->cursor)); -} - -int query_cursor_set_timeout_micros(QueryCursor *self, PyObject *arg, void *Py_UNUSED(payload)) { - if (DEPRECATE("Use the progress_callback in matches() or captures()") < 0) { - return -1; - } - if (arg == NULL || arg == Py_None) { - ts_query_cursor_set_timeout_micros(self->cursor, 0); - return 0; - } - if (!PyLong_Check(arg)) { - PyErr_Format(PyExc_TypeError, "'timeout_micros' must be assigned an int, not %s", - arg->ob_type->tp_name); - return -1; - } - - ts_query_cursor_set_timeout_micros(self->cursor, PyLong_AsSize_t(arg)); + ts_query_cursor_set_match_limit(self->cursor, PyLong_AsSize_t(arg)); return 0; } @@ -330,11 +301,6 @@ static PyMethodDef query_cursor_methods[] = { }; static PyGetSetDef query_cursor_accessors[] = { - {"timeout_micros", (getter)query_cursor_get_timeout_micros, - (setter)query_cursor_set_timeout_micros, - PyDoc_STR("The maximum duration in microseconds that query " - "execution should be allowed to take before halting."), - NULL}, {"match_limit", (getter)query_cursor_get_match_limit, (setter)query_cursor_set_match_limit, PyDoc_STR("The maximum number of in-progress matches."), NULL}, {"did_exceed_match_limit", (getter)query_cursor_get_did_exceed_match_limit, NULL, From 6d39f3691467a7a356c21a5eee6426596af79e49 Mon Sep 17 00:00:00 2001 From: ObserverOfTime Date: Sun, 14 Dec 2025 14:22:07 +0200 Subject: [PATCH 3/6] feat(query_cursor): add containing range methods - set_containing_byte_range - query_cursor_set_containing_point_range --- docs/classes/tree_sitter.QueryCursor.rst | 6 +++ tree_sitter/__init__.pyi | 7 +++ tree_sitter/binding/query_cursor.c | 56 +++++++++++++++++++++++- 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/docs/classes/tree_sitter.QueryCursor.rst b/docs/classes/tree_sitter.QueryCursor.rst index 53289c71..aa27a736 100644 --- a/docs/classes/tree_sitter.QueryCursor.rst +++ b/docs/classes/tree_sitter.QueryCursor.rst @@ -9,6 +9,12 @@ QueryCursor .. automethod:: captures .. automethod:: matches .. automethod:: set_byte_range + .. automethod:: set_containing_byte_range + + .. versionadded:: 0.26.0 + .. automethod:: set_containing_point_range + + .. versionadded:: 0.26.0 .. automethod:: set_max_start_depth .. automethod:: set_point_range diff --git a/tree_sitter/__init__.pyi b/tree_sitter/__init__.pyi index d4b58696..262bb924 100644 --- a/tree_sitter/__init__.pyi +++ b/tree_sitter/__init__.pyi @@ -314,12 +314,19 @@ class QueryCursor: def did_exceed_match_limit(self) -> bool: ... def set_max_start_depth(self, depth: int, /) -> None: ... def set_byte_range(self, start: int, end: int, /) -> None: ... + def set_containing_byte_range(self, start: int, end: int, /) -> None: ... def set_point_range( self, start: Point | tuple[int, int], end: Point | tuple[int, int], /, ) -> None: ... + def set_containing_point_range( + self, + start: Point | tuple[int, int], + end: Point | tuple[int, int], + /, + ) -> None: ... def captures( self, node: Node, diff --git a/tree_sitter/binding/query_cursor.c b/tree_sitter/binding/query_cursor.c index a49e46a2..e02be554 100644 --- a/tree_sitter/binding/query_cursor.c +++ b/tree_sitter/binding/query_cursor.c @@ -56,6 +56,18 @@ PyObject *query_cursor_set_byte_range(QueryCursor *self, PyObject *args) { return Py_NewRef(self); } +PyObject *query_cursor_set_containing_byte_range(QueryCursor *self, PyObject *args) { + uint32_t start_byte, end_byte; + if (!PyArg_ParseTuple(args, "II:set_containing_byte_range", &start_byte, &end_byte)) { + return NULL; + } + if (!ts_query_cursor_set_containing_byte_range(self->cursor, start_byte, end_byte)) { + PyErr_SetString(PyExc_ValueError, "Invalid byte range"); + return NULL; + } + return Py_NewRef(self); +} + PyObject *query_cursor_set_point_range(QueryCursor *self, PyObject *args) { TSPoint start_point, end_point; if (!PyArg_ParseTuple(args, "(II)(II):set_point_range", &start_point.row, &start_point.column, @@ -69,6 +81,19 @@ PyObject *query_cursor_set_point_range(QueryCursor *self, PyObject *args) { return Py_NewRef(self); } +PyObject *query_cursor_set_containing_point_range(QueryCursor *self, PyObject *args) { + TSPoint start_point, end_point; + if (!PyArg_ParseTuple(args, "(II)(II):set_containing_point_range", &start_point.row, + &start_point.column, &end_point.row, &end_point.column)) { + return NULL; + } + if (!ts_query_cursor_set_containing_point_range(self->cursor, start_point, end_point)) { + PyErr_SetString(PyExc_ValueError, "Invalid point range"); + return NULL; + } + return Py_NewRef(self); +} + static bool query_cursor_progress_callback(TSQueryCursorState *state) { PyObject *result = PyObject_CallFunction((PyObject *)state->payload, "I", state->current_byte_offset); @@ -247,6 +272,14 @@ PyDoc_STRVAR(query_cursor_set_byte_range_doc, "The query cursor will return matches that intersect with the given byte range. " "This means that a match may be returned even if some of its captures fall outside " "the specified range, as long as at least part of the match overlaps with it."); +PyDoc_STRVAR(query_cursor_set_containing_byte_range_doc, + "set_containing_byte_range(self, start, end)\n--\n\n" + "Set the byte range within which all matches must be fully contained." DOC_RAISES + "ValueError\n\n If the start byte exceeds the end byte." DOC_NOTE + "In contrast to :meth:`set_byte_range`, this will restrict the query cursor to only " + "return matches where *all* nodes are *fully* contained within the given range.\n" + "Both methods can be used together, e.g. to search for any matches that intersect " + "line 5000, as long as they are fully contained within lines 4500-5500"); PyDoc_STRVAR(query_cursor_set_point_range_doc, "set_point_range(self, start, end)\n--\n\n" "Set the range of points in which the query will be executed." DOC_RAISES @@ -254,6 +287,14 @@ PyDoc_STRVAR(query_cursor_set_point_range_doc, "The query cursor will return matches that intersect with the given point range. " "This means that a match may be returned even if some of its captures fall outside " "the specified range, as long as at least part of the match overlaps with it."); +PyDoc_STRVAR(query_cursor_set_containing_point_range_doc, + "set_containing_point_range(self, start, end)\n--\n\n" + "Set the point range within which all matches must be fully contained." DOC_RAISES + "ValueError\n\n If the start point exceeds the end point." DOC_NOTE + "In contrast to :meth:`set_point_range`, this will restrict the query cursor to only " + "return matches where *all* nodes are *fully* contained within the given range.\n" + "Both methods can be used together, e.g. to search for any matches that intersect " + "line 5000, as long as they are fully contained within lines 4500-5500"); PyDoc_STRVAR(query_cursor_matches_doc, "matches(self, node, /, predicate=None, progress_callback=None)\n--\n\n" "Get a list of *matches* within the given node." DOC_RETURNS @@ -279,12 +320,24 @@ static PyMethodDef query_cursor_methods[] = { .ml_flags = METH_VARARGS, .ml_doc = query_cursor_set_byte_range_doc, }, + { + .ml_name = "set_containing_byte_range", + .ml_meth = (PyCFunction)query_cursor_set_containing_byte_range, + .ml_flags = METH_VARARGS, + .ml_doc = query_cursor_set_containing_byte_range_doc, + }, { .ml_name = "set_point_range", .ml_meth = (PyCFunction)query_cursor_set_point_range, .ml_flags = METH_VARARGS, .ml_doc = query_cursor_set_point_range_doc, }, + { + .ml_name = "set_containing_point_range", + .ml_meth = (PyCFunction)query_cursor_set_containing_point_range, + .ml_flags = METH_VARARGS, + .ml_doc = query_cursor_set_containing_point_range_doc, + }, { .ml_name = "matches", .ml_meth = (PyCFunction)query_cursor_matches, @@ -311,8 +364,7 @@ static PyGetSetDef query_cursor_accessors[] = { }; static PyType_Slot query_cursor_type_slots[] = { - {Py_tp_doc, - PyDoc_STR("A class for executing a :class:`Query` on a syntax :class:`Tree`.")}, + {Py_tp_doc, PyDoc_STR("A class for executing a :class:`Query` on a syntax :class:`Tree`.")}, {Py_tp_new, query_cursor_new}, {Py_tp_init, query_cursor_init}, {Py_tp_dealloc, query_cursor_dealloc}, From 69f14cb6d2c0b2d83800cb57eadde2d6f9d371ea Mon Sep 17 00:00:00 2001 From: ObserverOfTime Date: Sun, 14 Dec 2025 15:32:10 +0200 Subject: [PATCH 4/6] feat!: add edit method to point & range The Point class is now a tuple subclass instead of a namedtuple --- docs/classes/tree_sitter.Point.rst | 12 +++ docs/classes/tree_sitter.Range.rst | 7 ++ docs/conf.py | 4 +- setup.py | 1 + tests/test_parser.py | 5 +- tree_sitter/__init__.py | 4 - tree_sitter/__init__.pyi | 33 ++++++-- tree_sitter/binding/module.c | 20 +---- tree_sitter/binding/node.c | 10 ++- tree_sitter/binding/parser.c | 4 +- tree_sitter/binding/point.c | 117 +++++++++++++++++++++++++++++ tree_sitter/binding/range.c | 53 ++++++++++++- tree_sitter/binding/tree.c | 37 ++++----- tree_sitter/binding/types.h | 5 +- 14 files changed, 256 insertions(+), 56 deletions(-) create mode 100644 tree_sitter/binding/point.c diff --git a/docs/classes/tree_sitter.Point.rst b/docs/classes/tree_sitter.Point.rst index 5e1f772c..b4fc6715 100644 --- a/docs/classes/tree_sitter.Point.rst +++ b/docs/classes/tree_sitter.Point.rst @@ -4,6 +4,18 @@ Point .. autoclass:: tree_sitter.Point :show-inheritance: + Methods + ------- + + .. automethod:: edit + + .. versionadded:: 0.26.0 + + Special Methods + --------------- + + .. automethod:: __repr__ + Attributes ---------- diff --git a/docs/classes/tree_sitter.Range.rst b/docs/classes/tree_sitter.Range.rst index 03185fe7..533d1bbb 100644 --- a/docs/classes/tree_sitter.Range.rst +++ b/docs/classes/tree_sitter.Range.rst @@ -3,6 +3,13 @@ Range .. autoclass:: tree_sitter.Range + Methods + ------- + + .. automethod:: edit + + .. versionadded:: 0.26.0 + Special Methods --------------- diff --git a/docs/conf.py b/docs/conf.py index b2fba165..2fe0c26a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -68,6 +68,8 @@ def process_signature(_app, _what, name, _obj, _options, _signature, return_anno return "(query, *, match_limit=None, timeout_micros=None)", return_annotation if name == "tree_sitter.Parser": return "(language, *, included_ranges=None, timeout_micros=None)", return_annotation + if name == "tree_sitter.Point": + return "(row, column)", return_annotation if name == "tree_sitter.Range": return "(start_point, end_point, start_byte, end_byte)", return_annotation if name == "tree_sitter.QueryPredicate": @@ -88,7 +90,7 @@ def process_docstring(_app, what, name, _obj, _options, lines): def process_bases(_app, name, _obj, _options, bases): if name == "tree_sitter.Point": - bases[-1] = ":class:`~typing.NamedTuple`" + bases[-1] = ":class:`tuple`" if name == "tree_sitter.LogType": bases[-1] = ":class:`~enum.IntEnum`" if name == "tree_sitter.LookaheadIterator": diff --git a/setup.py b/setup.py index 7a921fed..a6712dfb 100644 --- a/setup.py +++ b/setup.py @@ -39,6 +39,7 @@ def build_extension(self, ext: Extension): "tree_sitter/binding/lookahead_iterator.c", "tree_sitter/binding/node.c", "tree_sitter/binding/parser.c", + "tree_sitter/binding/point.c", "tree_sitter/binding/query.c", "tree_sitter/binding/query_cursor.c", "tree_sitter/binding/query_predicates.c", diff --git a/tests/test_parser.py b/tests/test_parser.py index b2a1910f..cd966bd0 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -469,6 +469,9 @@ def test_parsing_with_a_newly_included_range(self): def test_logging(self): from logging import getLogger + parse_logger = getLogger("tree_sitter.PARSE") + lex_logger = getLogger("tree_sitter.LEX") + def logger(log_type: LogType, message: str): match log_type: case LogType.PARSE: @@ -476,8 +479,6 @@ def logger(log_type: LogType, message: str): case LogType.LEX: lex_logger.info(message) - parse_logger = getLogger("tree_sitter.PARSE") - lex_logger = getLogger("tree_sitter.LEX") parser = Parser(self.python, logger=logger) with self.assertLogs("tree_sitter") as logs: parser.parse(b"foo") diff --git a/tree_sitter/__init__.py b/tree_sitter/__init__.py index a3fbe847..44b3c7da 100644 --- a/tree_sitter/__init__.py +++ b/tree_sitter/__init__.py @@ -21,10 +21,6 @@ LogType.__doc__ = "The type of a log message." -Point.__doc__ = "A position in a multi-line text document, in terms of rows and columns." -Point.row.__doc__ = "The zero-based row of the document." -Point.column.__doc__ = "The zero-based column of the document." - class QueryPredicate(_Protocol): """A custom query predicate that runs on a pattern.""" diff --git a/tree_sitter/__init__.pyi b/tree_sitter/__init__.pyi index 262bb924..10242a90 100644 --- a/tree_sitter/__init__.pyi +++ b/tree_sitter/__init__.pyi @@ -1,15 +1,11 @@ from enum import IntEnum from collections.abc import ByteString, Callable, Iterator, Sequence -from typing import Annotated, Any, Final, Literal, NamedTuple, Protocol, Self, final, overload +from typing import Annotated, Any, Final, Literal, Protocol, Self, final, overload from typing_extensions import deprecated class _SupportsFileno(Protocol): def fileno(self) -> int: ... -class Point(NamedTuple): - row: int - column: int - class LogType(IntEnum): PARSE: int LEX: int @@ -355,6 +351,24 @@ class LookaheadIterator(Iterator[tuple[int, str]]): def symbols(self) -> list[int]: ... def __next__(self) -> tuple[int, str]: ... +@final +class Point(tuple[int, int]): + def __new__(cls, row: int, column: int) -> Self: ... + @property + def row(self) -> int: ... + @property + def column(self) -> int: ... + def edit( + self, + start_byte: int, + old_end_byte: int, + new_end_byte: int, + start_point: Point | tuple[int, int], + old_end_point: Point | tuple[int, int], + new_end_point: Point | tuple[int, int], + ) -> tuple[Point, int]: ... + def __repr__(self) -> str: ... + @final class Range: def __init__( @@ -372,6 +386,15 @@ class Range: def start_byte(self) -> int: ... @property def end_byte(self) -> int: ... + def edit( + self, + start_byte: int, + old_end_byte: int, + new_end_byte: int, + start_point: Point | tuple[int, int], + old_end_point: Point | tuple[int, int], + new_end_point: Point | tuple[int, int], + ) -> None: ... def __eq__(self, other: Any, /) -> bool: ... def __ne__(self, other: Any, /) -> bool: ... def __repr__(self) -> str: ... diff --git a/tree_sitter/binding/module.c b/tree_sitter/binding/module.c index ab9990a7..4f01f683 100644 --- a/tree_sitter/binding/module.c +++ b/tree_sitter/binding/module.c @@ -4,6 +4,7 @@ extern PyType_Spec language_type_spec; extern PyType_Spec lookahead_iterator_type_spec; extern PyType_Spec node_type_spec; extern PyType_Spec parser_type_spec; +extern PyType_Spec point_type_spec; extern PyType_Spec query_cursor_type_spec; extern PyType_Spec query_predicate_anyof_type_spec; extern PyType_Spec query_predicate_eq_capture_type_spec; @@ -71,6 +72,8 @@ PyMODINIT_FUNC PyInit__binding(void) { (PyTypeObject *)PyType_FromModuleAndSpec(module, &lookahead_iterator_type_spec, NULL); state->node_type = (PyTypeObject *)PyType_FromModuleAndSpec(module, &node_type_spec, NULL); state->parser_type = (PyTypeObject *)PyType_FromModuleAndSpec(module, &parser_type_spec, NULL); + state->point_type = (PyTypeObject *)PyType_FromModuleAndSpec(module, &point_type_spec, + (PyObject *)&PyTuple_Type); state->query_predicate_anyof_type = (PyTypeObject *)PyType_FromModuleAndSpec(module, &query_predicate_anyof_type_spec, NULL); state->query_predicate_eq_capture_type = (PyTypeObject *)PyType_FromModuleAndSpec( @@ -94,6 +97,7 @@ PyMODINIT_FUNC PyInit__binding(void) { (PyObject *)state->lookahead_iterator_type) < 0) || (PyModule_AddObjectRef(module, "Node", (PyObject *)state->node_type) < 0) || (PyModule_AddObjectRef(module, "Parser", (PyObject *)state->parser_type) < 0) || + (PyModule_AddObjectRef(module, "Point", (PyObject *)state->point_type) < 0) || (PyModule_AddObjectRef(module, "Query", (PyObject *)state->query_type) < 0) || (PyModule_AddObjectRef(module, "QueryCursor", (PyObject *)state->query_cursor_type) < 0) || (PyModule_AddObjectRef(module, "QueryPredicateAnyof", @@ -126,22 +130,6 @@ PyMODINIT_FUNC PyInit__binding(void) { goto cleanup; } - PyObject *namedtuple = import_attribute("collections", "namedtuple"); - if (namedtuple == NULL) { - goto cleanup; - } - PyObject *point_args = Py_BuildValue("s[ss]", "Point", "row", "column"); - PyObject *point_kwargs = PyDict_New(); - PyDict_SetItemString(point_kwargs, "module", PyUnicode_FromString("tree_sitter")); - state->point_type = (PyTypeObject *)PyObject_Call(namedtuple, point_args, point_kwargs); - Py_DECREF(point_args); - Py_DECREF(point_kwargs); - Py_DECREF(namedtuple); - if (state->point_type == NULL || - PyModule_AddObjectRef(module, "Point", (PyObject *)state->point_type) < 0) { - goto cleanup; - } - PyObject *int_enum = import_attribute("enum", "IntEnum"); if (int_enum == NULL) { goto cleanup; diff --git a/tree_sitter/binding/node.c b/tree_sitter/binding/node.c index b68fa879..04a5d34d 100644 --- a/tree_sitter/binding/node.c +++ b/tree_sitter/binding/node.c @@ -1,5 +1,7 @@ #include "types.h" +PyObject *point_new_internal(ModuleState *state, TSPoint point); + PyObject *node_new_internal(ModuleState *state, TSNode node, PyObject *tree) { Node *self = PyObject_New(Node, state->node_type); if (self == NULL) { @@ -71,7 +73,7 @@ PyObject *node_edit(Node *self, PyObject *args, PyObject *kwargs) { &old_end_byte, &new_end_byte, &start_row, &start_column, &old_end_row, &old_end_column, &new_end_row, &new_end_column)) { - Py_RETURN_NONE; + return NULL; } TSInputEdit edit = { @@ -427,12 +429,12 @@ PyObject *node_get_range(Node *self, void *Py_UNUSED(payload)) { PyObject *node_get_start_point(Node *self, void *Py_UNUSED(payload)) { TSPoint point = ts_node_start_point(self->node); - return POINT_NEW(GET_MODULE_STATE(self), point); + return point_new_internal(GET_MODULE_STATE(self), point); } PyObject *node_get_end_point(Node *self, void *Py_UNUSED(payload)) { TSPoint point = ts_node_end_point(self->node); - return POINT_NEW(GET_MODULE_STATE(self), point); + return point_new_internal(GET_MODULE_STATE(self), point); } PyObject *node_get_children(Node *self, void *Py_UNUSED(payload)) { @@ -603,7 +605,7 @@ PyObject *node_get_text(Node *self, void *Py_UNUSED(payload)) { Py_DECREF(collected_bytes); return NULL; } - PyObject *position_obj = POINT_NEW(GET_MODULE_STATE(self), current_point); + PyObject *position_obj = point_new_internal(GET_MODULE_STATE(self), current_point); if (!position_obj) { Py_DECREF(byte_offset_obj); Py_DECREF(collected_bytes); diff --git a/tree_sitter/binding/parser.c b/tree_sitter/binding/parser.c index f0ee4490..cf1394a2 100644 --- a/tree_sitter/binding/parser.c +++ b/tree_sitter/binding/parser.c @@ -1,5 +1,7 @@ #include "types.h" +PyObject *point_new_internal(ModuleState *state, TSPoint point); + #define SET_ATTRIBUTE_ERROR(name) \ (name != NULL && name != Py_None && parser_set_##name(self, name, NULL) < 0) @@ -57,7 +59,7 @@ static const char *parser_read_wrapper(void *payload, uint32_t byte_offset, TSPo // Form arguments to callable. PyObject *byte_offset_obj = PyLong_FromUnsignedLong(byte_offset); - PyObject *position_obj = POINT_NEW(wrapper_payload->state, position); + PyObject *position_obj = point_new_internal(wrapper_payload->state, position); if (!position_obj || !byte_offset_obj) { *bytes_read = 0; return NULL; diff --git a/tree_sitter/binding/point.c b/tree_sitter/binding/point.c new file mode 100644 index 00000000..65e82877 --- /dev/null +++ b/tree_sitter/binding/point.c @@ -0,0 +1,117 @@ +#include "types.h" + +PyObject *point_new_internal(ModuleState *state, TSPoint point) { + PyObject *self = PyTuple_New(2); + if (self == NULL) { + return NULL; + } + PyTuple_SET_ITEM(self, 0, PyLong_FromUnsignedLong(point.row)); + PyTuple_SET_ITEM(self, 1, PyLong_FromUnsignedLong(point.column)); + return PyObject_Init(self, state->point_type); +} + +PyObject *point_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { + uint32_t row, column; + char *keywords[] = {"row", "column", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "II:__new__", keywords, &row, &column)) { + return NULL; + } + + PyObject *row_obj = PyLong_FromUnsignedLong(row), *col_obj = PyLong_FromUnsignedLong(column); + PyObject *self = PyTuple_Pack(2, row_obj, col_obj); + if (!self) { + return NULL; + } + Py_SET_TYPE(self, type); + return self; +} + +PyObject *point_repr(PyObject *self) { + uint32_t row = PyLong_AsUnsignedLong(PyTuple_GET_ITEM(self, 0)), + column = PyLong_AsUnsignedLong(PyTuple_GET_ITEM(self, 1)); + return PyUnicode_FromFormat("", row, column); +} + +PyObject *point_get_row(PyObject *self, void *Py_UNUSED(payload)) { + return PyTuple_GetItem(self, 0); +} + +PyObject *point_get_column(PyObject *self, void *Py_UNUSED(payload)) { + return PyTuple_GetItem(self, 1); +} + +PyObject *point_edit(PyObject *self, PyObject *args, PyObject *kwargs) { + uint32_t start_byte, start_row, start_column; + uint32_t old_end_byte, old_end_row, old_end_column; + uint32_t new_end_byte, new_end_row, new_end_column; + char *keywords[] = { + "start_byte", "old_end_byte", "new_end_byte", "start_point", + "old_end_point", "new_end_point", NULL, + }; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "III(II)(II)(II):edit", keywords, &start_byte, + &old_end_byte, &new_end_byte, &start_row, &start_column, + &old_end_row, &old_end_column, &new_end_row, + &new_end_column)) { + return NULL; + } + + uint32_t new_start_byte; + uint32_t row = PyLong_AsUnsignedLong(PyTuple_GET_ITEM(self, 0)), + column = PyLong_AsUnsignedLong(PyTuple_GET_ITEM(self, 1)); + TSPoint point = {row, column}; + TSInputEdit edit = { + .start_byte = start_byte, + .old_end_byte = old_end_byte, + .new_end_byte = new_end_byte, + .start_point = {start_row, start_column}, + .old_end_point = {old_end_row, old_end_column}, + .new_end_point = {new_end_row, new_end_column}, + }; + + ts_point_edit(&point, &new_start_byte, &edit); + PyObject *new_point = point_new_internal(GET_MODULE_STATE(self), point); + return PyTuple_Pack(2, new_point, PyLong_FromUnsignedLong(new_start_byte)); +} + +PyDoc_STRVAR(point_edit_doc, + "edit(self, /, start_byte, old_end_byte, new_end_byte, start_point, " + "old_end_point, new_end_point)\n--\n\n" + "Edit this point to keep it in-sync with source code that has been edited." DOC_RETURNS + "The edited point and its new start byte." DOC_TIP + "This is useful for editing points without requiring a tree or node instance."); + +static PyMethodDef point_methods[] = { + { + .ml_name = "edit", + .ml_meth = (PyCFunction)point_edit, + .ml_flags = METH_KEYWORDS | METH_VARARGS, + .ml_doc = point_edit_doc, + }, + {NULL}, +}; + +static PyGetSetDef point_accessors[] = { + {"row", (getter)point_get_row, NULL, PyDoc_STR("The zero-based row of the document."), NULL}, + {"column", (getter)point_get_column, NULL, + PyDoc_STR("The zero-based column of the document." DOC_NOTE "Measured in bytes."), NULL}, + {NULL}, +}; + +static PyType_Slot point_type_slots[] = { + {Py_tp_doc, + PyDoc_STR("A position in a multi-line text document, in terms of rows and columns.")}, + {Py_tp_new, point_new}, + {Py_tp_repr, point_repr}, + {Py_tp_methods, point_methods}, + {Py_tp_getset, point_accessors}, + {0, NULL}, +}; + +PyType_Spec point_type_spec = { + .name = "tree_sitter.Point", + .basicsize = sizeof(PyTupleObject), + .itemsize = 0, + .flags = Py_TPFLAGS_DEFAULT, + .slots = point_type_slots, +}; diff --git a/tree_sitter/binding/range.c b/tree_sitter/binding/range.c index a92ec017..c315d5c0 100644 --- a/tree_sitter/binding/range.c +++ b/tree_sitter/binding/range.c @@ -1,5 +1,7 @@ #include "types.h" +PyObject *point_new_internal(ModuleState *state, TSPoint point); + int range_init(Range *self, PyObject *args, PyObject *kwargs) { uint32_t start_row, start_col, end_row, end_col, start_byte, end_byte; char *keywords[] = { @@ -98,11 +100,11 @@ PyObject *range_compare(Range *self, PyObject *other, int op) { } PyObject *range_get_start_point(Range *self, void *Py_UNUSED(payload)) { - return POINT_NEW(GET_MODULE_STATE(self), self->range.start_point); + return point_new_internal(GET_MODULE_STATE(self), self->range.start_point); } PyObject *range_get_end_point(Range *self, void *Py_UNUSED(payload)) { - return POINT_NEW(GET_MODULE_STATE(self), self->range.end_point); + return point_new_internal(GET_MODULE_STATE(self), self->range.end_point); } PyObject *range_get_start_byte(Range *self, void *Py_UNUSED(payload)) { @@ -113,6 +115,52 @@ PyObject *range_get_end_byte(Range *self, void *Py_UNUSED(payload)) { return PyLong_FromUnsignedLong(self->range.end_byte); } +PyObject *range_edit(Range *self, PyObject *args, PyObject *kwargs) { + uint32_t start_byte, start_row, start_column; + uint32_t old_end_byte, old_end_row, old_end_column; + uint32_t new_end_byte, new_end_row, new_end_column; + char *keywords[] = { + "start_byte", "old_end_byte", "new_end_byte", "start_point", + "old_end_point", "new_end_point", NULL, + }; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "III(II)(II)(II):edit", keywords, &start_byte, + &old_end_byte, &new_end_byte, &start_row, &start_column, + &old_end_row, &old_end_column, &new_end_row, + &new_end_column)) { + return NULL; + } + + TSInputEdit edit = { + .start_byte = start_byte, + .old_end_byte = old_end_byte, + .new_end_byte = new_end_byte, + .start_point = {start_row, start_column}, + .old_end_point = {old_end_row, old_end_column}, + .new_end_point = {new_end_row, new_end_column}, + }; + + ts_range_edit(&self->range, &edit); + + Py_RETURN_NONE; +} + +PyDoc_STRVAR(range_edit_doc, + "edit(self, /, start_byte, old_end_byte, new_end_byte, start_point, " + "old_end_point, new_end_point)\n--\n\n" + "Edit this range to keep it in-sync with source code that has been edited." DOC_TIP + "This is useful for editing ranges without requiring a tree or node instance."); + +static PyMethodDef range_methods[] = { + { + .ml_name = "edit", + .ml_meth = (PyCFunction)range_edit, + .ml_flags = METH_KEYWORDS | METH_VARARGS, + .ml_doc = range_edit_doc, + }, + {NULL}, +}; + static PyGetSetDef range_accessors[] = { {"start_point", (getter)range_get_start_point, NULL, PyDoc_STR("The start point."), NULL}, {"start_byte", (getter)range_get_start_byte, NULL, PyDoc_STR("The start byte."), NULL}, @@ -129,6 +177,7 @@ static PyType_Slot range_type_slots[] = { {Py_tp_repr, range_repr}, {Py_tp_hash, range_hash}, {Py_tp_richcompare, range_compare}, + {Py_tp_methods, range_methods}, {Py_tp_getset, range_accessors}, {0, NULL}, }; diff --git a/tree_sitter/binding/tree.c b/tree_sitter/binding/tree.c index 83618c08..bcfd91ee 100644 --- a/tree_sitter/binding/tree.c +++ b/tree_sitter/binding/tree.c @@ -54,24 +54,27 @@ PyObject *tree_edit(Tree *self, PyObject *args, PyObject *kwargs) { "old_end_point", "new_end_point", NULL, }; - int ok = PyArg_ParseTupleAndKeywords( - args, kwargs, "III(II)(II)(II):edit", keywords, &start_byte, &old_end_byte, &new_end_byte, - &start_row, &start_column, &old_end_row, &old_end_column, &new_end_row, &new_end_column); - - if (ok) { - TSInputEdit edit = { - .start_byte = start_byte, - .old_end_byte = old_end_byte, - .new_end_byte = new_end_byte, - .start_point = {start_row, start_column}, - .old_end_point = {old_end_row, old_end_column}, - .new_end_point = {new_end_row, new_end_column}, - }; - ts_tree_edit(self->tree, &edit); - Py_XDECREF(self->source); - self->source = Py_None; - Py_INCREF(self->source); + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "III(II)(II)(II):edit", keywords, &start_byte, + &old_end_byte, &new_end_byte, &start_row, &start_column, + &old_end_row, &old_end_column, &new_end_row, + &new_end_column)) { + return NULL; } + + TSInputEdit edit = { + .start_byte = start_byte, + .old_end_byte = old_end_byte, + .new_end_byte = new_end_byte, + .start_point = {start_row, start_column}, + .old_end_point = {old_end_row, old_end_column}, + .new_end_point = {new_end_row, new_end_column}, + }; + + ts_tree_edit(self->tree, &edit); + + Py_XDECREF(self->source); + self->source = Py_None; + Py_INCREF(self->source); Py_RETURN_NONE; } diff --git a/tree_sitter/binding/types.h b/tree_sitter/binding/types.h index 86ea35f0..e353730d 100644 --- a/tree_sitter/binding/types.h +++ b/tree_sitter/binding/types.h @@ -76,7 +76,7 @@ typedef struct { PyObject_HEAD PyObject *predicate; PyObject *arguments; - uint32_t pattern_index; + uint32_t pattern_index; } QueryPredicateGeneric; typedef struct { @@ -134,9 +134,6 @@ typedef struct { #define IS_INSTANCE(obj, type_name) IS_INSTANCE_OF(obj, GET_MODULE_STATE(self)->type_name) -#define POINT_NEW(state, point) \ - PyObject_CallFunction((PyObject *)(state)->point_type, "II", (point).row, (point).column) - #define DEPRECATE(msg) PyErr_WarnEx(PyExc_DeprecationWarning, msg, 1) #define REPLACE(old, new) DEPRECATE(old " is deprecated. Use " new " instead.") From 91aa48a78fb55067e905e18e2589960e8de4710b Mon Sep 17 00:00:00 2001 From: ObserverOfTime Date: Fri, 19 Sep 2025 21:37:36 +0300 Subject: [PATCH 5/6] feat: add version string --- docs/index.rst | 4 ++++ pyproject.toml | 8 ++++---- setup.py | 9 ++++++++- tree_sitter/__init__.py | 2 ++ tree_sitter/__init__.pyi | 2 ++ tree_sitter/binding/module.c | 1 + 6 files changed, 21 insertions(+), 5 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index f85972f3..07441c8f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -21,6 +21,10 @@ Constants The earliest ABI version that is supported by the current version of the library. +.. autodata:: tree_sitter.__version__ + + The version of the tree-sitter package. + Classes ------- diff --git a/pyproject.toml b/pyproject.toml index a485a75b..55c578f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,3 @@ -[build-system] -requires = ["setuptools>=43"] -build-backend = "setuptools.build_meta" - [project] name = "tree-sitter" version = "0.25.2" @@ -45,6 +41,10 @@ tests = [ "tree-sitter-rust==0.24.0", ] +[build-system] +requires = ["setuptools>=43"] +build-backend = "setuptools.build_meta" + [tool.ruff] target-version = "py310" line-length = 100 diff --git a/setup.py b/setup.py index a6712dfb..4569f6b6 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,13 @@ +from pathlib import PurePath as Path from platform import machine from setuptools import Extension, setup # type: ignore -from setuptools.command.build_ext import build_ext +from setuptools.command.build_ext import build_ext # type: ignore + +with open(Path(__file__).with_name("pyproject.toml")) as f: + next(f) # skip [project] + next(f) # skip name = "tree-sitter" + version = next(f).replace("version = ", "", 1) class BuildExt(build_ext): @@ -58,6 +64,7 @@ def build_extension(self, ext: Extension): ("_DEFAULT_SOURCE", None), ("PY_SSIZE_T_CLEAN", None), ("TREE_SITTER_HIDE_SYMBOLS", None), + ("PY_TS_VERSION", version), ], ) ], diff --git a/tree_sitter/__init__.py b/tree_sitter/__init__.py index 44b3c7da..bcd147d6 100644 --- a/tree_sitter/__init__.py +++ b/tree_sitter/__init__.py @@ -17,6 +17,7 @@ TreeCursor, LANGUAGE_VERSION, MIN_COMPATIBLE_LANGUAGE_VERSION, + __version__ ) LogType.__doc__ = "The type of a log message." @@ -64,4 +65,5 @@ def __call__(self, predicate, args, pattern_index, captures): "TreeCursor", "LANGUAGE_VERSION", "MIN_COMPATIBLE_LANGUAGE_VERSION", + "__version__" ] diff --git a/tree_sitter/__init__.pyi b/tree_sitter/__init__.pyi index 10242a90..982b7bb0 100644 --- a/tree_sitter/__init__.pyi +++ b/tree_sitter/__init__.pyi @@ -403,3 +403,5 @@ class Range: LANGUAGE_VERSION: Final[int] MIN_COMPATIBLE_LANGUAGE_VERSION: Final[int] + +__version__: Final[str] diff --git a/tree_sitter/binding/module.c b/tree_sitter/binding/module.c index 4f01f683..e058da77 100644 --- a/tree_sitter/binding/module.c +++ b/tree_sitter/binding/module.c @@ -145,6 +145,7 @@ PyMODINIT_FUNC PyInit__binding(void) { PyModule_AddIntConstant(module, "LANGUAGE_VERSION", TREE_SITTER_LANGUAGE_VERSION); PyModule_AddIntConstant(module, "MIN_COMPATIBLE_LANGUAGE_VERSION", TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION); + PyModule_AddStringConstant(module, "__version__", PY_TS_VERSION); #ifdef Py_GIL_DISABLED PyUnstable_Module_SetGIL(module, Py_MOD_GIL_USED); From 18e5c3f7ab5dbdb9219798a20d12183222ef1440 Mon Sep 17 00:00:00 2001 From: ObserverOfTime Date: Sun, 14 Dec 2025 19:20:52 +0200 Subject: [PATCH 6/6] refactor: prefer unsigned long over size_t --- tree_sitter/binding/language.c | 7 ++++--- tree_sitter/binding/node.c | 23 ++++++++++------------- tree_sitter/binding/query.c | 17 +++++++++-------- tree_sitter/binding/query_cursor.c | 6 +++--- tree_sitter/binding/query_predicates.c | 9 ++++----- tree_sitter/binding/range.c | 10 +++++----- 6 files changed, 35 insertions(+), 37 deletions(-) diff --git a/tree_sitter/binding/language.c b/tree_sitter/binding/language.c index 03ff0099..d28a7f2e 100644 --- a/tree_sitter/binding/language.c +++ b/tree_sitter/binding/language.c @@ -9,7 +9,7 @@ int language_init(Language *self, PyObject *args, PyObject *Py_UNUSED(kwargs)) { if (PyCapsule_CheckExact(language)) { self->language = PyCapsule_GetPointer(language, "tree_sitter.Language"); } else { - Py_uintptr_t language_id = PyLong_AsSize_t(language); + Py_uintptr_t language_id = PyLong_AsUnsignedLong(language); if (language_id == 0 || (language_id % sizeof(TSLanguage *)) != 0) { if (!PyErr_Occurred()) { PyErr_SetString(PyExc_ValueError, "invalid language ID"); @@ -143,7 +143,7 @@ PyObject *language_id_for_node_kind(Language *self, PyObject *args) { if (!PyArg_ParseTuple(args, "s#p:id_for_node_kind", &kind, &length, &named)) { return NULL; } - TSSymbol symbol = ts_language_symbol_for_name(self->language, kind, length, named); + TSSymbol symbol = ts_language_symbol_for_name(self->language, kind, (uint32_t)length, named); if (symbol == 0) { Py_RETURN_NONE; } @@ -195,7 +195,8 @@ PyObject *language_field_id_for_name(Language *self, PyObject *args) { if (!PyArg_ParseTuple(args, "s#:field_id_for_name", &field_name, &length)) { return NULL; } - TSFieldId field_id = ts_language_field_id_for_name(self->language, field_name, length); + TSFieldId field_id = + ts_language_field_id_for_name(self->language, field_name, (uint32_t)length); if (field_id == 0) { Py_RETURN_NONE; } diff --git a/tree_sitter/binding/node.c b/tree_sitter/binding/node.c index 04a5d34d..7849cbaf 100644 --- a/tree_sitter/binding/node.c +++ b/tree_sitter/binding/node.c @@ -327,7 +327,7 @@ PyObject *node_child_with_descendant(Node *self, PyObject *args) { return NULL; } - TSNode child = ts_node_child_with_descendant(self->node, ((Node *) descendant)->node); + TSNode child = ts_node_child_with_descendant(self->node, ((Node *)descendant)->node); if (ts_node_is_null(child)) { Py_RETURN_NONE; } @@ -387,11 +387,11 @@ PyObject *node_get_is_missing(Node *self, void *Py_UNUSED(payload)) { } PyObject *node_get_start_byte(Node *self, void *Py_UNUSED(payload)) { - return PyLong_FromSize_t((size_t)ts_node_start_byte(self->node)); + return PyLong_FromUnsignedLong(ts_node_start_byte(self->node)); } PyObject *node_get_end_byte(Node *self, void *Py_UNUSED(payload)) { - return PyLong_FromSize_t((size_t)ts_node_end_byte(self->node)); + return PyLong_FromUnsignedLong(ts_node_end_byte(self->node)); } PyObject *node_get_byte_range(Node *self, void *Py_UNUSED(payload)) { @@ -560,13 +560,13 @@ PyObject *node_get_text(Node *self, void *Py_UNUSED(payload)) { } PyObject *result = NULL; - size_t start_offset = (size_t)ts_node_start_byte(self->node); - size_t end_offset = (size_t)ts_node_end_byte(self->node); + uint32_t start_offset = ts_node_start_byte(self->node), + end_offset = ts_node_end_byte(self->node); // Case 1: source is a byte buffer if (!PyCallable_Check(tree->source)) { - PyObject *start_byte = PyLong_FromSize_t(start_offset), - *end_byte = PyLong_FromSize_t(end_offset); + PyObject *start_byte = PyLong_FromUnsignedLong(start_offset), + *end_byte = PyLong_FromUnsignedLong(end_offset); PyObject *slice = PySlice_New(start_byte, end_byte, NULL); Py_XDECREF(start_byte); Py_XDECREF(end_byte); @@ -599,7 +599,6 @@ PyObject *node_get_text(Node *self, void *Py_UNUSED(payload)) { TSPoint current_point = start_point; for (size_t current_offset = start_offset; current_offset < end_offset;) { - // Form arguments to callable. PyObject *byte_offset_obj = PyLong_FromSize_t(current_offset); if (!byte_offset_obj) { Py_DECREF(collected_bytes); @@ -616,7 +615,6 @@ PyObject *node_get_text(Node *self, void *Py_UNUSED(payload)) { Py_XDECREF(byte_offset_obj); Py_XDECREF(position_obj); - // Call callable. PyObject *rv = PyObject_Call(tree->source, args, NULL); Py_XDECREF(args); @@ -636,10 +634,9 @@ PyObject *node_get_text(Node *self, void *Py_UNUSED(payload)) { } collected_bytes = new_collected_bytes; - // Update current_point and current_offset - Py_ssize_t bytes_read = PyBytes_Size(rv); - const char *rv_str = PyBytes_AsString(rv); // Retrieve the string pointer once - for (Py_ssize_t i = 0; i < bytes_read; ++i) { + size_t bytes_read = (size_t)PyBytes_Size(rv); + const char *rv_str = PyBytes_AsString(rv); + for (size_t i = 0; i < bytes_read; ++i) { if (rv_str[i] == '\n') { ++current_point.row; current_point.column = 0; diff --git a/tree_sitter/binding/query.c b/tree_sitter/binding/query.c index 80cb2cdc..e4bf76b7 100644 --- a/tree_sitter/binding/query.c +++ b/tree_sitter/binding/query.c @@ -52,7 +52,8 @@ PyObject *query_new(PyTypeObject *cls, PyObject *args, PyObject *Py_UNUSED(kwarg TSQueryError error_type; PyObject *pattern_predicates = NULL, *pattern_settings = NULL, *pattern_assertions = NULL; TSLanguage *language_id = ((Language *)language_obj)->language; - query->query = ts_query_new(language_id, source, source_len, &error_offset, &error_type); + query->query = + ts_query_new(language_id, source, (uint32_t)source_len, &error_offset, &error_type); query->predicates = NULL; query->settings = NULL; query->assertions = NULL; @@ -437,7 +438,7 @@ PyObject *query_new(PyTypeObject *cls, PyObject *args, PyObject *Py_UNUSED(kwarg PyObject_New(QueryPredicateGeneric, state->query_predicate_generic_type); predicate->predicate = PyUnicode_FromStringAndSize(predicate_name, length); predicate->arguments = PyList_New(predicate_len - 1); - predicate->pattern_index = i; + predicate->pattern_index = i; for (uint32_t k = 1; k < predicate_len; ++k) { PyObject *item; if ((predicate_step + k)->type == TSQueryPredicateStepTypeCapture) { @@ -574,7 +575,7 @@ PyObject *query_disable_capture(Query *self, PyObject *args) { if (!PyArg_ParseTuple(args, "s#:disable_capture", &capture_name, &length)) { return NULL; } - ts_query_disable_capture(self->query, capture_name, length); + ts_query_disable_capture(self->query, capture_name, (uint32_t)length); return Py_NewRef(self); } @@ -585,7 +586,7 @@ PyObject *query_start_byte_for_pattern(Query *self, PyObject *args) { } CHECK_INDEX(self->query, pattern_index); start_byte = ts_query_start_byte_for_pattern(self->query, pattern_index); - return PyLong_FromSize_t(start_byte); + return PyLong_FromUnsignedLong(start_byte); } PyObject *query_end_byte_for_pattern(Query *self, PyObject *args) { @@ -595,7 +596,7 @@ PyObject *query_end_byte_for_pattern(Query *self, PyObject *args) { } CHECK_INDEX(self->query, pattern_index); end_byte = ts_query_end_byte_for_pattern(self->query, pattern_index); - return PyLong_FromSize_t(end_byte); + return PyLong_FromUnsignedLong(end_byte); } PyObject *query_is_pattern_rooted(Query *self, PyObject *args) { @@ -627,15 +628,15 @@ PyObject *query_is_pattern_guaranteed_at_step(Query *self, PyObject *args) { } PyObject *query_get_pattern_count(Query *self, void *Py_UNUSED(payload)) { - return PyLong_FromSize_t(ts_query_pattern_count(self->query)); + return PyLong_FromUnsignedLong(ts_query_pattern_count(self->query)); } PyObject *query_get_capture_count(Query *self, void *Py_UNUSED(payload)) { - return PyLong_FromSize_t(ts_query_capture_count(self->query)); + return PyLong_FromUnsignedLong(ts_query_capture_count(self->query)); } PyObject *query_get_string_count(Query *self, void *Py_UNUSED(payload)) { - return PyLong_FromSize_t(ts_query_string_count(self->query)); + return PyLong_FromUnsignedLong(ts_query_string_count(self->query)); } PyDoc_STRVAR(query_disable_pattern_doc, "disable_pattern(self, index)\n--\n\n" diff --git a/tree_sitter/binding/query_cursor.c b/tree_sitter/binding/query_cursor.c index e02be554..4eb66aab 100644 --- a/tree_sitter/binding/query_cursor.c +++ b/tree_sitter/binding/query_cursor.c @@ -157,7 +157,7 @@ PyObject *query_cursor_matches(QueryCursor *self, PyObject *args, PyObject *kwar PyList_Append(capture_list, capture_node); Py_XDECREF(capture_node); } - PyObject *pattern_index = PyLong_FromSize_t(match.pattern_index); + PyObject *pattern_index = PyLong_FromUnsignedLong(match.pattern_index); PyObject *tuple_match = PyTuple_Pack(2, pattern_index, captures_for_match); Py_DECREF(pattern_index); Py_DECREF(captures_for_match); @@ -240,7 +240,7 @@ PyObject *query_cursor_captures(QueryCursor *self, PyObject *args, PyObject *kwa } PyObject *query_cursor_get_did_exceed_match_limit(QueryCursor *self, void *Py_UNUSED(payload)) { - return PyLong_FromSize_t(ts_query_cursor_did_exceed_match_limit(self->cursor)); + return PyLong_FromUnsignedLong(ts_query_cursor_did_exceed_match_limit(self->cursor)); } PyObject *query_cursor_get_match_limit(QueryCursor *self, void *Py_UNUSED(payload)) { @@ -258,7 +258,7 @@ int query_cursor_set_match_limit(QueryCursor *self, PyObject *arg, void *Py_UNUS return -1; } - ts_query_cursor_set_match_limit(self->cursor, PyLong_AsSize_t(arg)); + ts_query_cursor_set_match_limit(self->cursor, PyLong_AsUnsignedLong(arg)); return 0; } diff --git a/tree_sitter/binding/query_predicates.c b/tree_sitter/binding/query_predicates.c index 14ce909d..b65ee349 100644 --- a/tree_sitter/binding/query_predicates.c +++ b/tree_sitter/binding/query_predicates.c @@ -32,8 +32,7 @@ static inline PyObject *captures_for_match(ModuleState *state, TSQuery *query, T PyObject *captures = PyDict_New(); for (uint32_t j = 0; j < match->capture_count; ++j) { TSQueryCapture capture = match->captures[j]; - const char *capture_name = - ts_query_capture_name_for_id(query, capture.index, &name_length); + const char *capture_name = ts_query_capture_name_for_id(query, capture.index, &name_length); PyObject *capture_name_obj = PyUnicode_FromStringAndSize(capture_name, name_length); if (capture_name_obj == NULL) { return NULL; @@ -160,9 +159,9 @@ bool query_satisfies_predicates(Query *query, TSQueryMatch match, Tree *tree, Py break; } QueryPredicateGeneric *predicate = (QueryPredicateGeneric *)item; - PyObject *result = PyObject_CallFunction(callable, "OOIO", predicate->predicate, - predicate->arguments, - predicate->pattern_index, captures); + PyObject *result = + PyObject_CallFunction(callable, "OOIO", predicate->predicate, predicate->arguments, + predicate->pattern_index, captures); if (result == NULL) { is_satisfied = false; break; diff --git a/tree_sitter/binding/range.c b/tree_sitter/binding/range.c index c315d5c0..aaa264c9 100644 --- a/tree_sitter/binding/range.c +++ b/tree_sitter/binding/range.c @@ -46,21 +46,21 @@ PyObject *range_repr(Range *self) { Py_hash_t range_hash(Range *self) { // FIXME: replace with an efficient integer hashing algorithm - PyObject *row_tuple = PyTuple_Pack(2, PyLong_FromSize_t(self->range.start_point.row), + PyObject *row_tuple = PyTuple_Pack(2, PyLong_FromUnsignedLong(self->range.start_point.row), PyLong_FromLong(self->range.end_point.row)); if (!row_tuple) { return -1; } - PyObject *col_tuple = PyTuple_Pack(2, PyLong_FromSize_t(self->range.start_point.column), - PyLong_FromSize_t(self->range.end_point.column)); + PyObject *col_tuple = PyTuple_Pack(2, PyLong_FromUnsignedLong(self->range.start_point.column), + PyLong_FromUnsignedLong(self->range.end_point.column)); if (!col_tuple) { Py_DECREF(row_tuple); return -1; } - PyObject *bytes_tuple = PyTuple_Pack(2, PyLong_FromSize_t(self->range.start_byte), - PyLong_FromSize_t(self->range.end_byte)); + PyObject *bytes_tuple = PyTuple_Pack(2, PyLong_FromUnsignedLong(self->range.start_byte), + PyLong_FromUnsignedLong(self->range.end_byte)); if (!bytes_tuple) { Py_DECREF(row_tuple); Py_DECREF(col_tuple);