diff --git a/src/cdp/AXNode.zig b/src/cdp/AXNode.zig new file mode 100644 index 000000000..910eb1c1c --- /dev/null +++ b/src/cdp/AXNode.zig @@ -0,0 +1,911 @@ +// Copyright (C) 2023-2024 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const Allocator = std.mem.Allocator; + +const log = @import("../log.zig"); +const parser = @import("../browser/netsurf.zig"); + +const AXNode = @This(); +const Node = @import("Node.zig"); + +// Need a custom writer, because we can't just serialize the node as-is. +// Sometimes we want to serializ the node without chidren, sometimes with just +// its direct children, and sometimes the entire tree. +// (For now, we only support direct children) +pub const Writer = struct { + root: *const Node, + registry: *Node.Registry, + + const AXValuesType = enum(u8) { boolean, tristate, booleanOrUndefined, idref, idrefList, integer, node, nodeList, number, string, computedString, token, tokenList, domRelation, role, internalRole, valueUndefined }; + + pub const Opts = struct {}; + + pub fn jsonStringify(self: *const Writer, w: anytype) error{WriteFailed}!void { + self.toJSON(self.root, w) catch |err| { + // The only error our jsonStringify method can return is + // @TypeOf(w).Error. In other words, our code can't return its own + // error, we can only return a writer error. Kinda sucks. + log.err(.cdp, "node toJSON stringify", .{ .err = err }); + return error.WriteFailed; + }; + } + + fn toJSON(self: *const Writer, node: *const Node, w: anytype) !void { + try w.beginArray(); + const root = try AXNode.fromNode(node._node); + if (try self.writeNode(node.id, root, w)) { + try w.endArray(); + return; + } + + const walker = Walker{}; + var next: ?*parser.Node = null; + var skip_children = false; + while (true) { + next = try walker.get_next(node._node, next, .{ .skip_children = skip_children }) orelse break; + + const node_type = parser.nodeType(next.?); + if (node_type != .element and node_type != .text) { + skip_children = true; + continue; + } + + // special case: if the node is a text, it depends the parent to + // keep the text. + if (node_type == .text) { + if (parser.nodeParentNode(next.?)) |p| { + if (try ignoreText(p)) { + skip_children = true; + continue; + } + } + } + + const n = try self.registry.register(next.?); + const axn = try AXNode.fromNode(next.?); + skip_children = try self.writeNode(n.id, axn, w); + } + + try w.endArray(); + } + + const AXValue = struct { + type: enum(u8) { boolean, tristate, booleanOrUndefined, idref, idrefList, integer, node, nodeList, number, string, computedString, token, tokenList, domRelation, role, internalRole, valueUndefined }, + value: ?union(enum) { + string: []const u8, + uint: usize, + boolean: bool, + } = null, + // TODO relatedNodes + source: ?AXSource = null, + }; + + fn writeAXSource(_: *const Writer, source: AXSource, w: anytype) !void { + try w.objectField("sources"); + try w.beginArray(); + try w.beginObject(); + // attribute, implicit, style, contents, placeholder, relatedElement + const source_type = switch (source) { + .aria_labelledby => blk: { + try w.objectField("attribute"); + try w.write(@tagName(source)); + break :blk "relatedElement"; + }, + .aria_label, .alt, .title, .placeholder, .value => blk: { + // No sure if it's correct for .value case. + try w.objectField("attribute"); + try w.write(@tagName(source)); + break :blk "attribute"; + }, + // Chrome sends the content AXValue *again* in the source. + // But It seems useless to me. + // + // w.objectField("value"); + // self.writeAXValue(.{ .type = .computedString, .value = value.value }, w); + .contents => "contents", + .label_element, .label_wrap => "TODO", // TODO + }; + + try w.objectField("type"); + try w.write(source_type); + try w.endObject(); + try w.endArray(); + } + + fn writeAXValue(self: *const Writer, value: AXValue, w: anytype) !void { + try w.beginObject(); + try w.objectField("type"); + try w.write(@tagName(value.type)); + + if (value.value) |v| { + try w.objectField("value"); + switch (v) { + .uint => try w.write(v.uint), + .string => try w.write(v.string), + .boolean => try w.write(v.boolean), + } + } + + if (value.source) |source| { + try self.writeAXSource(source, w); + } + try w.endObject(); + } + + const AXProperty = struct { + name: enum(u8) { actions, busy, disabled, editable, focusable, focused, hidden, hiddenRoot, invalid, keyshortcuts, settable, roledescription, live, atomic, relevant, root, autocomplete, hasPopup, level, multiselectable, orientation, multiline, readonly, required, valuemin, valuemax, valuetext, checked, expanded, modal, pressed, selected, activedescendant, controls, describedby, details, errormessage, flowto, labelledby, owns, url, activeFullscreenElement, activeModalDialog, activeAriaModalDialog, ariaHiddenElement, ariaHiddenSubtree, emptyAlt, emptyText, inertElement, inertSubtree, labelContainer, labelFor, notRendered, notVisible, presentationalRole, probablyPresentational, inactiveCarouselTabContent, uninteresting }, + value: AXValue, + }; + + fn writeAXProperties(self: *const Writer, axnode: AXNode, w: anytype) !void { + const node = axnode._node; + switch (parser.nodeType(node)) { + .document => { + const uri = try parser.documentGetDocumentURI(@ptrCast(node)); + try self.writeAXProperty(.{ .name = .url, .value = .{ .type = .string, .value = .{ .string = uri } } }, w); + try self.writeAXProperty(.{ .name = .focusable, .value = .{ .type = .booleanOrUndefined, .value = .{ .boolean = true } } }, w); + return; + }, + .element, .text => {}, + else => { + log.debug(.cdp, "invalid tag", .{ .node_type = parser.nodeType(node) }); + return error.InvalidTag; + }, + } + + const elt: *parser.Element = @ptrCast(node); + + const tag = try parser.elementTag(elt); + return switch (tag) { + .h1 => try self.writeAXProperty(.{ .name = .level, .value = .{ .type = .integer, .value = .{ .uint = 1 } } }, w), + .h2 => try self.writeAXProperty(.{ .name = .level, .value = .{ .type = .integer, .value = .{ .uint = 2 } } }, w), + .h3 => try self.writeAXProperty(.{ .name = .level, .value = .{ .type = .integer, .value = .{ .uint = 3 } } }, w), + .h4 => try self.writeAXProperty(.{ .name = .level, .value = .{ .type = .integer, .value = .{ .uint = 4 } } }, w), + .h5 => try self.writeAXProperty(.{ .name = .level, .value = .{ .type = .integer, .value = .{ .uint = 5 } } }, w), + .h6 => try self.writeAXProperty(.{ .name = .level, .value = .{ .type = .integer, .value = .{ .uint = 6 } } }, w), + .img => { + // TODO make uri absolute + if (try parser.elementGetAttribute(elt, "href")) |uri| { + try self.writeAXProperty(.{ .name = .url, .value = .{ .type = .string, .value = .{ .string = uri } } }, w); + } + }, + .a => { + // TODO make uri absolute + if (try parser.elementGetAttribute(elt, "href")) |uri| { + try self.writeAXProperty(.{ .name = .url, .value = .{ .type = .string, .value = .{ .string = uri } } }, w); + } + try self.writeAXProperty(.{ .name = .focusable, .value = .{ .type = .booleanOrUndefined, .value = .{ .boolean = true } } }, w); + }, + else => {}, + }; + } + + fn writeAXProperty(self: *const Writer, value: AXProperty, w: anytype) !void { + try w.beginObject(); + try w.objectField("name"); + try w.write(@tagName(value.name)); + try w.objectField("value"); + try self.writeAXValue(value.value, w); + try w.endObject(); + } + + // write a node. returns true if children must be skipped. + fn writeNode(self: *const Writer, id: u32, axn: AXNode, w: anytype) !bool { + // ignore empty texts + try w.beginObject(); + + try w.objectField("nodeId"); + try w.write(id); + + try w.objectField("backendDOMNodeId"); + try w.write(id); + + try w.objectField("role"); + try self.writeAXValue(.{ .type = .role, .value = .{ .string = try axn.getRole() } }, w); + + const ignore = try axn.isIgnore(); + try w.objectField("ignored"); + try w.write(ignore); + + if (ignore) { + // Ignore reasons + try w.objectField("ignored_reasons"); + try w.beginArray(); + try w.beginObject(); + try w.objectField("name"); + try w.write("uninteresting"); + try w.objectField("value"); + try self.writeAXValue(.{ .type = .boolean, .value = .{ .boolean = true } }, w); + try w.endObject(); + try w.endArray(); + } else { + // Name + try w.objectField("name"); + try w.beginObject(); + try w.objectField("type"); + try w.write(@tagName(.computedString)); + try w.objectField("value"); + const source = try axn.writeName(w); + if (source) |s| { + try self.writeAXSource(s, w); + } + try w.endObject(); + + // Properties + try w.objectField("properties"); + try w.beginArray(); + try self.writeAXProperties(axn, w); + try w.endArray(); + } + + const n = axn._node; + + // Parent + if (parser.nodeParentNode(n)) |p| { + const parent_node = try self.registry.register(p); + try w.objectField("parentId"); + try w.write(parent_node.id); + } + + // Children + const skip_children = try axn.ignoreChildren(); + const skip_text = try ignoreText(n); + + try w.objectField("childIds"); + try w.beginArray(); + if (!skip_children) { + var registry = self.registry; + const child_nodes = try parser.nodeGetChildNodes(n); + const child_count = parser.nodeListLength(child_nodes); + + var i: usize = 0; + for (0..child_count) |_| { + defer i += 1; + const child = (parser.nodeListItem(child_nodes, @intCast(i))) orelse break; + + // ignore non-elements or text. + if (parser.nodeType(child) != .element and (parser.nodeType(child) != .text or skip_text)) { + continue; + } + + const child_node = try registry.register(child); + try w.write(child_node.id); + } + } + try w.endArray(); + + try w.endObject(); + + return skip_children; + } +}; + +pub const AXRole = enum(u8) { + none, + article, + banner, + blockquote, + button, + caption, + cell, + checkbox, + code, + columnheader, + combobox, + complementary, + contentinfo, + definition, + deletion, + dialog, + document, + emphasis, + figure, + form, + group, + heading, + image, + insertion, + link, + list, + listbox, + listitem, + main, + marquee, + meter, + navigation, + option, + paragraph, + presentation, + progressbar, + radio, + region, + row, + rowgroup, + rowheader, + searchbox, + separator, + slider, + spinbutton, + status, + strong, + subscript, + superscript, + table, + term, + textbox, + time, + RootWebArea, + LineBreak, + StaticText, + + fn fromNode(node: *parser.Node) !AXRole { + switch (parser.nodeType(node)) { + .document => return .RootWebArea, // Chrome specific. + .element => {}, + .text => return .StaticText, + else => { + log.debug(.cdp, "invalid tag", .{ .node_type = parser.nodeType(node) }); + return error.InvalidTag; + }, + } + + const elt: *parser.Element = @ptrCast(node); + + const tag = try parser.elementTag(elt); + return switch (tag) { + // Navigation & Structure + .nav => .navigation, + .main => .main, + .aside => .complementary, + // TODO conditions: + // .banner Not descendant of article, aside, main, nav, section + // (none) When descendant of article, aside, main, nav, section + .header => .banner, + // TODO conditions: + // contentinfo Not descendant of article, aside, main, nav, section + // (none) When descendant of article, aside, main, nav, section + .footer => .contentinfo, + // TODO conditions: + // region Has accessible name (aria-label, aria-labelledby, or title) | + // (none) No accessible name | + .section => .region, + .article, .hgroup => .article, + .address => .group, + + // Headings + .h1, .h2, .h3, .h4, .h5, .h6 => .heading, + .ul, .ol, .menu => .list, + .li => .listitem, + .dt => .term, + .dd => .definition, + + // Forms & Inputs + // TODO conditions: + // form Has accessible name + // (none) No accessible name + .form => .form, + .input => { + const input_type = try parser.inputGetType(@ptrCast(elt)); + switch (input_type.len) { + 3 => { + // tel defaults to textbox + // url defaults to textbox + }, + 4 => { + if (std.ascii.eqlIgnoreCase(input_type, "date")) { + return .none; + } + if (std.ascii.eqlIgnoreCase(input_type, "file")) { + return .none; + } + if (std.ascii.eqlIgnoreCase(input_type, "time")) { + return .none; + } + if (std.ascii.eqlIgnoreCase(input_type, "week")) { + return .none; + } + // text defaults to textbox + }, + 5 => { + if (std.ascii.eqlIgnoreCase(input_type, "color")) { + return .none; + } + if (std.ascii.eqlIgnoreCase(input_type, "image")) { + return .button; + } + if (std.ascii.eqlIgnoreCase(input_type, "month")) { + return .none; + } + if (std.ascii.eqlIgnoreCase(input_type, "radio")) { + return .radio; + } + if (std.ascii.eqlIgnoreCase(input_type, "range")) { + return .slider; + } + if (std.ascii.eqlIgnoreCase(input_type, "reset")) { + return .button; + } + // email defaults to textbox. + }, + 6 => { + if (std.ascii.eqlIgnoreCase(input_type, "button")) { + return .button; + } + if (std.ascii.eqlIgnoreCase(input_type, "hidden")) { + return .none; + } + if (std.ascii.eqlIgnoreCase(input_type, "number")) { + return .spinbutton; + } + if (std.ascii.eqlIgnoreCase(input_type, "search")) { + return .searchbox; + } + if (std.ascii.eqlIgnoreCase(input_type, "submit")) { + return .button; + } + }, + 8 => { + if (std.ascii.eqlIgnoreCase(input_type, "checkbox")) { + return .checkbox; + } + if (std.ascii.eqlIgnoreCase(input_type, "password")) { + return .none; + } + }, + 14 => { + if (std.ascii.eqlIgnoreCase(input_type, "datetime-local")) { + return .none; + } + }, + else => {}, + } + return .textbox; + }, + .textarea => .textbox, + .select => { + if (try getAttribute(node, "multiple") != null) { + return .listbox; + } + if (try getAttribute(node, "size")) |size| { + if (!std.ascii.eqlIgnoreCase(size, "1")) { + return .listbox; + } + } + return .combobox; + }, + .option => .option, + .optgroup, .fieldset => .group, + .button => .button, + .output => .status, + .progress => .progressbar, + .meter => .meter, + .datalist => .listbox, + + // Interactive Elements + .a, .area => { + if (try getAttribute(node, "href") == null) { + return .none; + } + + return .link; + }, + .details => .group, + .summary => .button, + .dialog => .dialog, + + // Media + .img => .image, + .figure => .figure, + + // Tables + .table => .table, + .caption => .caption, + .thead, .tbody, .tfoot => .rowgroup, + .tr => .row, + .th => { + if (try getAttribute(node, "scope")) |scope| { + if (std.ascii.eqlIgnoreCase(scope, "row")) { + return .rowheader; + } + } + return .columnheader; + }, + .td => .cell, + + // Text & Semantics + .p => .paragraph, + .hr => .separator, + .blockquote => .blockquote, + .code => .code, + .em => .emphasis, + .strong => .strong, + .s, .del => .deletion, + .ins => .insertion, + .sub => .subscript, + .sup => .superscript, + .time => .time, + .dfn => .term, + + // Document Structure + .html => .none, + .body => .none, + + // Deprecated/Obsolete Elements + .marquee => .marquee, + + .br => .LineBreak, + + else => .none, + }; + } +}; + +_node: *parser.Node, +role_attr: ?[]const u8, + +pub fn fromNode(node: *parser.Node) !AXNode { + return .{ + ._node = node, + .role_attr = try getAttribute(node, "role"), + }; +} + +const AXSource = enum(u8) { + aria_labelledby, + aria_label, + label_element, //