From 87ed22c60c0fcdc6c15462923b064c9a60d43f5e Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 7 Dec 2025 19:13:12 +0000 Subject: [PATCH 1/3] feat(go-opencode): Add WebFetch tool for fetching web content Port the WebFetch tool from TypeScript to Go OpenCode. The tool fetches content from URLs and returns it in the requested format (text, markdown, or html). Key features: - URL validation (must start with http:// or https://) - Configurable timeout (max 120 seconds, default 30) - Response size limit (5MB max) - HTML to Markdown conversion using html-to-markdown library - HTML to plain text extraction using goquery (strips scripts, styles) - Accept headers based on requested format - User-Agent spoofing to avoid blocking Dependencies added: - github.com/JohannesKaufmann/html-to-markdown v1.6.0 - github.com/PuerkitoBio/goquery v1.10.0 --- go-opencode/go.mod | 9 +- go-opencode/go.sum | 63 +++ go-opencode/internal/tool/registry.go | 1 + go-opencode/internal/tool/webfetch.go | 239 ++++++++++ go-opencode/internal/tool/webfetch_test.go | 496 +++++++++++++++++++++ 5 files changed, 805 insertions(+), 3 deletions(-) create mode 100644 go-opencode/internal/tool/webfetch.go create mode 100644 go-opencode/internal/tool/webfetch_test.go diff --git a/go-opencode/go.mod b/go-opencode/go.mod index 727e9eb3b1f..2135dde44cc 100644 --- a/go-opencode/go.mod +++ b/go-opencode/go.mod @@ -1,6 +1,6 @@ module github.com/opencode-ai/opencode -go 1.25 +go 1.24 require ( // Eino LLM Framework @@ -21,11 +21,14 @@ require ( ) require ( + github.com/JohannesKaufmann/html-to-markdown v1.6.0 + github.com/PuerkitoBio/goquery v1.10.0 github.com/ThreeDotsLabs/watermill v1.5.1 github.com/agnivade/levenshtein v1.2.1 github.com/bmatcuk/doublestar/v4 v4.9.1 github.com/cenkalti/backoff/v4 v4.3.0 github.com/cloudwego/eino-ext/components/model/ark v0.1.50 + github.com/fsnotify/fsnotify v1.9.0 github.com/joho/godotenv v1.5.1 github.com/mark3labs/mcp-go v0.43.1 github.com/modelcontextprotocol/go-sdk v1.1.0 @@ -41,6 +44,7 @@ require ( require ( github.com/Masterminds/semver/v3 v3.4.0 // indirect + github.com/andybalholm/cascadia v1.3.2 // indirect github.com/anthropics/anthropic-sdk-go v1.19.0 // indirect github.com/aws/aws-sdk-go-v2 v1.33.0 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.3 // indirect @@ -67,7 +71,6 @@ require ( github.com/dustin/go-humanize v1.0.1 // indirect github.com/eino-contrib/jsonschema v1.0.2 // indirect github.com/evanphx/json-patch v0.5.2 // indirect - github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/google/go-cmp v0.7.0 // indirect @@ -122,4 +125,4 @@ require ( replace github.com/sst/opencode-sdk-go => ../packages/sdk/go -replace github.com/cloudwego/eino-ext/components/model/claude => /Users/joohwi.lee/repos/cloudwego/eino-ext/components/model/claude +// replace github.com/cloudwego/eino-ext/components/model/claude => /Users/joohwi.lee/repos/cloudwego/eino-ext/components/model/claude diff --git a/go-opencode/go.sum b/go-opencode/go.sum index 068b140ed9c..50960675278 100644 --- a/go-opencode/go.sum +++ b/go-opencode/go.sum @@ -1,12 +1,19 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/JohannesKaufmann/html-to-markdown v1.6.0 h1:04VXMiE50YYfCfLboJCLcgqF5x+rHJnb1ssNmqpLH/k= +github.com/JohannesKaufmann/html-to-markdown v1.6.0/go.mod h1:NUI78lGg/a7vpEJTz/0uOcYMaibytE4BUOQS8k78yPQ= github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= +github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk= +github.com/PuerkitoBio/goquery v1.10.0 h1:6fiXdLuUvYs2OJSvNRqlNPoBm6YABE226xrbavY5Wv4= +github.com/PuerkitoBio/goquery v1.10.0/go.mod h1:TjZZl68Q3eGHNBA8CWaxAN7rOU1EbDz3CWuolcO5Yu4= github.com/ThreeDotsLabs/watermill v1.5.1 h1:t5xMivyf9tpmU3iozPqyrCZXHvoV1XQDfihas4sV0fY= github.com/ThreeDotsLabs/watermill v1.5.1/go.mod h1:Uop10dA3VeJWsSvis9qO3vbVY892LARrKAdki6WtXS4= github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM= github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU= github.com/airbrake/gobrake v3.6.1+incompatible/go.mod h1:wM4gu3Cn0W0K7GUuVWnlXZU11AGBXMILnrdOU8Kn00o= +github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= +github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= github.com/anthropics/anthropic-sdk-go v1.19.0 h1:mO6E+ffSzLRvR/YUH9KJC0uGw0uV8GjISIuzem//3KE= github.com/anthropics/anthropic-sdk-go v1.19.0/go.mod h1:WTz31rIUHUHqai2UslPpw5CwXrQP3geYBioRV4WOLvE= github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q= @@ -69,6 +76,8 @@ github.com/cloudwego/eino v0.6.0 h1:pobGKMOfcQHVNhD9UT/HrvO0eYG6FC2ML/NKY2Eb9+Q= github.com/cloudwego/eino v0.6.0/go.mod h1:JNapfU+QUrFFpboNDrNOFvmz0m9wjBFHHCr77RH6a50= github.com/cloudwego/eino-ext/components/model/ark v0.1.50 h1:1jM9hC9L9Y5E67UqKP7HEJM1PQwy84hELFwRaWjXM0w= github.com/cloudwego/eino-ext/components/model/ark v0.1.50/go.mod h1:dC4wNeUdnjo4s/1r+YG7fMQcnfQ3bOFWw8Penh86vOI= +github.com/cloudwego/eino-ext/components/model/claude v0.1.10 h1:UgYDk+4qBg6G0MQOJ5qYjFkoe1oLkyqONhaoj+v7RW4= +github.com/cloudwego/eino-ext/components/model/claude v0.1.10/go.mod h1:lPQZg8LudfhI4B9Apr9txCe5wbCY/9d/IB7Vv/sdcU8= github.com/cloudwego/eino-ext/components/model/openai v0.1.5 h1:+yvGbTPw93li9GSmdm6Rix88Yy8AXg5NNBcRbWx3CQU= github.com/cloudwego/eino-ext/components/model/openai v0.1.5/go.mod h1:IPVYMFoZcuHeVEsDTGN6SZjvue0xr1iZFhdpq1SBWdQ= github.com/cloudwego/eino-ext/libs/acl/openai v0.1.2 h1:r9Id2wzJ05PoHl+Km7jQgNMgciaZI93TVnUYso89esM= @@ -223,6 +232,7 @@ github.com/pborman/getopt v0.0.0-20170112200414-7148bc3a4c30/go.mod h1:85jBQOZwp github.com/pelletier/go-toml/v2 v2.0.9 h1:uH2qQXheeefCCkuBBSLi7jCiSmj3VRh2+Goq2N7Xxu0= github.com/pelletier/go-toml/v2 v2.0.9/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -235,6 +245,10 @@ github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY= github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sebdah/goldie/v2 v2.5.3 h1:9ES/mNN+HNUbNWpVAlrzuZ7jE+Nrczbj8uFRjM7624Y= +github.com/sebdah/goldie/v2 v2.5.3/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI= +github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= +github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I= github.com/sergi/go-diff v1.4.0 h1:n/SP9D5ad1fORl+llWyN+D6qoUETXNZARKjyY2/KVCw= github.com/sergi/go-diff v1.4.0/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= @@ -293,6 +307,9 @@ github.com/yargevad/filepathx v1.0.0 h1:SYcT+N3tYGi+NvazubCNlvgIPbzAk7i7y2dwg3I5 github.com/yargevad/filepathx v1.0.0/go.mod h1:BprfX/gpYNJHJfc35GjRRpVcwWXS89gGulUIU5tK3tA= github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/yuin/goldmark v1.7.1 h1:3bajkSilaCbjdKVsKdZjZCLBNPL9pYzrCakKaf4U49U= +github.com/yuin/goldmark v1.7.1/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E= go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU= go.uber.org/mock v0.4.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= @@ -301,6 +318,10 @@ golang.org/x/arch v0.11.0 h1:KXV8WWKCXm6tRpLirl2szsO5j/oOODwZf4hATmGVNs4= golang.org/x/arch v0.11.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= +golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4= golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -309,6 +330,8 @@ golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1/go.mod h1:FXUEEKJgO7OQYeo8N0 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ= golang.org/x/mod v0.27.0/go.mod h1:rWI627Fq0DEoudcK+MBkNkCe0EetEaDSwJJkCcjpazc= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -316,6 +339,15 @@ golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= +golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -324,29 +356,60 @@ golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKl golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= +golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4= golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= diff --git a/go-opencode/internal/tool/registry.go b/go-opencode/internal/tool/registry.go index 047dcec8d85..8eea04f8ce2 100644 --- a/go-opencode/internal/tool/registry.go +++ b/go-opencode/internal/tool/registry.go @@ -114,6 +114,7 @@ func DefaultRegistry(workDir string, store *storage.Storage) *Registry { r.Register(NewGlobTool(workDir)) r.Register(NewGrepTool(workDir)) r.Register(NewListTool(workDir)) + r.Register(NewWebFetchTool(workDir)) // Register todo tools r.Register(NewTodoWriteTool(workDir, store)) diff --git a/go-opencode/internal/tool/webfetch.go b/go-opencode/internal/tool/webfetch.go new file mode 100644 index 00000000000..6729cc7b5dc --- /dev/null +++ b/go-opencode/internal/tool/webfetch.go @@ -0,0 +1,239 @@ +package tool + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "time" + + md "github.com/JohannesKaufmann/html-to-markdown" + "github.com/PuerkitoBio/goquery" + einotool "github.com/cloudwego/eino/components/tool" +) + +const webfetchDescription = `Fetches content from a specified URL and returns it in the requested format. + +Usage notes: + - IMPORTANT: If an MCP-provided web fetch tool is available, prefer using that tool instead of this one, as it may have fewer restrictions. + - The URL must be a fully-formed valid URL starting with http:// or https:// + - HTTP URLs will be automatically upgraded to HTTPS + - This tool is read-only and does not modify any files + - Results may be truncated if the content is very large (>5MB limit) + - Use format "markdown" for readable content, "text" for plain text, "html" for raw HTML` + +const ( + maxResponseSize = 5 * 1024 * 1024 // 5MB + defaultTimeout = 30 * time.Second + maxTimeout = 120 * time.Second +) + +// WebFetchTool implements web content fetching. +type WebFetchTool struct { + workDir string + client *http.Client +} + +// WebFetchInput represents the input for the webfetch tool. +// SDK compatible: uses camelCase field names to match TypeScript. +type WebFetchInput struct { + URL string `json:"url"` + Format string `json:"format"` + Timeout int `json:"timeout,omitempty"` +} + +// NewWebFetchTool creates a new webfetch tool. +func NewWebFetchTool(workDir string) *WebFetchTool { + return &WebFetchTool{ + workDir: workDir, + client: &http.Client{ + Timeout: defaultTimeout, + }, + } +} + +func (t *WebFetchTool) ID() string { return "webfetch" } +func (t *WebFetchTool) Description() string { return webfetchDescription } + +func (t *WebFetchTool) Parameters() json.RawMessage { + return json.RawMessage(`{ + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "The URL to fetch content from" + }, + "format": { + "type": "string", + "enum": ["text", "markdown", "html"], + "description": "The format to return the content in (text, markdown, or html)" + }, + "timeout": { + "type": "integer", + "description": "Optional timeout in seconds (max 120)" + } + }, + "required": ["url", "format"] + }`) +} + +func (t *WebFetchTool) Execute(ctx context.Context, input json.RawMessage, toolCtx *Context) (*Result, error) { + var params WebFetchInput + if err := json.Unmarshal(input, ¶ms); err != nil { + return nil, fmt.Errorf("invalid input: %w", err) + } + + // Validate URL + if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") { + return nil, fmt.Errorf("URL must start with http:// or https://") + } + + // Validate format + if params.Format != "text" && params.Format != "markdown" && params.Format != "html" { + return nil, fmt.Errorf("format must be 'text', 'markdown', or 'html'") + } + + // Calculate timeout + timeout := defaultTimeout + if params.Timeout > 0 { + timeout = time.Duration(params.Timeout) * time.Second + if timeout > maxTimeout { + timeout = maxTimeout + } + } + + // Create HTTP request with context and timeout + reqCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + req, err := http.NewRequestWithContext(reqCtx, "GET", params.URL, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + // Set headers based on format + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + req.Header.Set("Accept-Language", "en-US,en;q=0.9") + + switch params.Format { + case "markdown": + req.Header.Set("Accept", "text/markdown;q=1.0, text/x-markdown;q=0.9, text/plain;q=0.8, text/html;q=0.7, */*;q=0.1") + case "text": + req.Header.Set("Accept", "text/plain;q=1.0, text/markdown;q=0.9, text/html;q=0.8, */*;q=0.1") + case "html": + req.Header.Set("Accept", "text/html;q=1.0, application/xhtml+xml;q=0.9, text/plain;q=0.8, text/markdown;q=0.7, */*;q=0.1") + default: + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8") + } + + // Execute request + resp, err := t.client.Do(req) + if err != nil { + return nil, fmt.Errorf("request failed: %w", err) + } + defer resp.Body.Close() + + // Check status code + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return nil, fmt.Errorf("request failed with status code: %d", resp.StatusCode) + } + + // Check content length header + if resp.ContentLength > maxResponseSize { + return nil, fmt.Errorf("response too large (exceeds 5MB limit)") + } + + // Read response body with size limit + limitedReader := io.LimitReader(resp.Body, maxResponseSize+1) + body, err := io.ReadAll(limitedReader) + if err != nil { + return nil, fmt.Errorf("failed to read response: %w", err) + } + + if len(body) > maxResponseSize { + return nil, fmt.Errorf("response too large (exceeds 5MB limit)") + } + + content := string(body) + contentType := resp.Header.Get("Content-Type") + title := fmt.Sprintf("%s (%s)", params.URL, contentType) + + // Process content based on format + var output string + switch params.Format { + case "markdown": + if strings.Contains(contentType, "text/html") { + output, err = convertHTMLToMarkdown(content) + if err != nil { + return nil, fmt.Errorf("failed to convert HTML to markdown: %w", err) + } + } else { + output = content + } + case "text": + if strings.Contains(contentType, "text/html") { + output, err = extractTextFromHTML(content) + if err != nil { + return nil, fmt.Errorf("failed to extract text from HTML: %w", err) + } + } else { + output = content + } + case "html": + output = content + default: + output = content + } + + return &Result{ + Title: title, + Output: output, + Metadata: map[string]any{}, + }, nil +} + +func (t *WebFetchTool) EinoTool() einotool.InvokableTool { + return &einoToolWrapper{tool: t} +} + +// extractTextFromHTML extracts plain text from HTML, removing scripts, styles, and other non-content elements. +func extractTextFromHTML(html string) (string, error) { + doc, err := goquery.NewDocumentFromReader(strings.NewReader(html)) + if err != nil { + return "", err + } + + // Remove non-content elements + doc.Find("script, style, noscript, iframe, object, embed").Remove() + + // Get text content + text := doc.Text() + + // Clean up whitespace + text = strings.TrimSpace(text) + + return text, nil +} + +// convertHTMLToMarkdown converts HTML content to Markdown format. +func convertHTMLToMarkdown(html string) (string, error) { + converter := md.NewConverter("", true, &md.Options{ + HeadingStyle: "atx", + HorizontalRule: "---", + BulletListMarker: "-", + CodeBlockStyle: "fenced", + EmDelimiter: "*", + }) + + // Remove non-content elements + converter.Remove("script", "style", "meta", "link") + + markdown, err := converter.ConvertString(html) + if err != nil { + return "", err + } + + return markdown, nil +} diff --git a/go-opencode/internal/tool/webfetch_test.go b/go-opencode/internal/tool/webfetch_test.go new file mode 100644 index 00000000000..e7b9488f4fb --- /dev/null +++ b/go-opencode/internal/tool/webfetch_test.go @@ -0,0 +1,496 @@ +package tool + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" +) + +func TestWebFetchTool_Properties(t *testing.T) { + tool := NewWebFetchTool("/tmp") + + if tool.ID() != "webfetch" { + t.Errorf("Expected ID 'webfetch', got %q", tool.ID()) + } + + desc := tool.Description() + if !strings.Contains(desc, "URL") { + t.Error("Description should mention 'URL'") + } + + params := tool.Parameters() + if len(params) == 0 { + t.Error("Parameters should not be empty") + } + + // Verify JSON schema is valid + var schema map[string]any + if err := json.Unmarshal(params, &schema); err != nil { + t.Errorf("Parameters should be valid JSON: %v", err) + } + + // Check required properties + props, ok := schema["properties"].(map[string]any) + if !ok { + t.Error("Schema should have properties") + } + if _, ok := props["url"]; !ok { + t.Error("Schema should have url property") + } + if _, ok := props["format"]; !ok { + t.Error("Schema should have format property") + } +} + +func TestWebFetchTool_URLValidation(t *testing.T) { + tool := NewWebFetchTool("/tmp") + ctx := context.Background() + toolCtx := testContext() + + tests := []struct { + name string + url string + wantErr bool + errMsg string + }{ + {"valid https", "https://example.com", false, ""}, + {"valid http", "http://example.com", false, ""}, + {"missing protocol", "example.com", true, "http:// or https://"}, + {"ftp protocol", "ftp://example.com", true, "http:// or https://"}, + {"file protocol", "file:///etc/passwd", true, "http:// or https://"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a mock server for valid URLs + if !tt.wantErr { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain") + w.Write([]byte("test content")) + })) + defer server.Close() + tt.url = server.URL + } + + input := json.RawMessage(`{"url": "` + tt.url + `", "format": "text"}`) + _, err := tool.Execute(ctx, input, toolCtx) + + if tt.wantErr { + if err == nil { + t.Errorf("Expected error for URL %q", tt.url) + } else if !strings.Contains(err.Error(), tt.errMsg) { + t.Errorf("Error should contain %q, got: %v", tt.errMsg, err) + } + } + }) + } +} + +func TestWebFetchTool_FormatValidation(t *testing.T) { + tool := NewWebFetchTool("/tmp") + ctx := context.Background() + toolCtx := testContext() + + tests := []struct { + format string + wantErr bool + }{ + {"text", false}, + {"markdown", false}, + {"html", false}, + {"json", true}, + {"xml", true}, + {"", true}, + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain") + w.Write([]byte("test")) + })) + defer server.Close() + + for _, tt := range tests { + t.Run(tt.format, func(t *testing.T) { + input := json.RawMessage(`{"url": "` + server.URL + `", "format": "` + tt.format + `"}`) + _, err := tool.Execute(ctx, input, toolCtx) + + if tt.wantErr && err == nil { + t.Errorf("Expected error for format %q", tt.format) + } + if !tt.wantErr && err != nil { + t.Errorf("Unexpected error for format %q: %v", tt.format, err) + } + }) + } +} + +func TestWebFetchTool_HTMLToMarkdown(t *testing.T) { + tool := NewWebFetchTool("/tmp") + ctx := context.Background() + toolCtx := testContext() + + htmlContent := ` + +Test + +

Hello World

+

This is a test paragraph.

+ + +` + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + w.Write([]byte(htmlContent)) + })) + defer server.Close() + + input := json.RawMessage(`{"url": "` + server.URL + `", "format": "markdown"}`) + result, err := tool.Execute(ctx, input, toolCtx) + if err != nil { + t.Fatalf("Execute failed: %v", err) + } + + // Check markdown conversion + if !strings.Contains(result.Output, "# Hello World") { + t.Error("Output should contain markdown heading") + } + if !strings.Contains(result.Output, "**test**") { + t.Error("Output should contain bold text") + } + if !strings.Contains(result.Output, "- Item 1") { + t.Error("Output should contain list items") + } +} + +func TestWebFetchTool_HTMLToText(t *testing.T) { + tool := NewWebFetchTool("/tmp") + ctx := context.Background() + toolCtx := testContext() + + htmlContent := ` + + +Test + + + + +

Hello World

+

This is a test.

+ + +` + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + w.Write([]byte(htmlContent)) + })) + defer server.Close() + + input := json.RawMessage(`{"url": "` + server.URL + `", "format": "text"}`) + result, err := tool.Execute(ctx, input, toolCtx) + if err != nil { + t.Fatalf("Execute failed: %v", err) + } + + // Check text extraction + if !strings.Contains(result.Output, "Hello World") { + t.Error("Output should contain heading text") + } + if !strings.Contains(result.Output, "This is a test") { + t.Error("Output should contain paragraph text") + } + + // Script content should be removed + if strings.Contains(result.Output, "alert") { + t.Error("Output should not contain script content") + } + if strings.Contains(result.Output, "console.log") { + t.Error("Output should not contain script content") + } + if strings.Contains(result.Output, "color: red") { + t.Error("Output should not contain style content") + } +} + +func TestWebFetchTool_HTMLPassthrough(t *testing.T) { + tool := NewWebFetchTool("/tmp") + ctx := context.Background() + toolCtx := testContext() + + htmlContent := `

Test

` + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + w.Write([]byte(htmlContent)) + })) + defer server.Close() + + input := json.RawMessage(`{"url": "` + server.URL + `", "format": "html"}`) + result, err := tool.Execute(ctx, input, toolCtx) + if err != nil { + t.Fatalf("Execute failed: %v", err) + } + + // HTML format should return raw HTML + if result.Output != htmlContent { + t.Errorf("Expected raw HTML, got %q", result.Output) + } +} + +func TestWebFetchTool_PlainTextPassthrough(t *testing.T) { + tool := NewWebFetchTool("/tmp") + ctx := context.Background() + toolCtx := testContext() + + plainContent := "This is plain text content." + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain") + w.Write([]byte(plainContent)) + })) + defer server.Close() + + // Test all formats with plain text - should all return as-is + formats := []string{"text", "markdown", "html"} + for _, format := range formats { + t.Run(format, func(t *testing.T) { + input := json.RawMessage(`{"url": "` + server.URL + `", "format": "` + format + `"}`) + result, err := tool.Execute(ctx, input, toolCtx) + if err != nil { + t.Fatalf("Execute failed: %v", err) + } + + if result.Output != plainContent { + t.Errorf("Format %s: Expected plain text passthrough, got %q", format, result.Output) + } + }) + } +} + +func TestWebFetchTool_HTTPError(t *testing.T) { + tool := NewWebFetchTool("/tmp") + ctx := context.Background() + toolCtx := testContext() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + })) + defer server.Close() + + input := json.RawMessage(`{"url": "` + server.URL + `", "format": "text"}`) + _, err := tool.Execute(ctx, input, toolCtx) + if err == nil { + t.Error("Expected error for 404 response") + } + if !strings.Contains(err.Error(), "404") { + t.Errorf("Error should mention status code, got: %v", err) + } +} + +func TestWebFetchTool_InvalidInput(t *testing.T) { + tool := NewWebFetchTool("/tmp") + ctx := context.Background() + toolCtx := testContext() + + // Invalid JSON + input := json.RawMessage(`{invalid json}`) + _, err := tool.Execute(ctx, input, toolCtx) + if err == nil { + t.Error("Expected error for invalid JSON input") + } +} + +func TestWebFetchTool_Timeout(t *testing.T) { + tool := NewWebFetchTool("/tmp") + ctx := context.Background() + toolCtx := testContext() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain") + w.Write([]byte("fast response")) + })) + defer server.Close() + + // Test with explicit timeout + input := json.RawMessage(`{"url": "` + server.URL + `", "format": "text", "timeout": 5}`) + result, err := tool.Execute(ctx, input, toolCtx) + if err != nil { + t.Fatalf("Execute failed: %v", err) + } + + if result.Output != "fast response" { + t.Errorf("Expected 'fast response', got %q", result.Output) + } +} + +func TestWebFetchTool_ResultMetadata(t *testing.T) { + tool := NewWebFetchTool("/tmp") + ctx := context.Background() + toolCtx := testContext() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + w.Write([]byte("Test")) + })) + defer server.Close() + + input := json.RawMessage(`{"url": "` + server.URL + `", "format": "text"}`) + result, err := tool.Execute(ctx, input, toolCtx) + if err != nil { + t.Fatalf("Execute failed: %v", err) + } + + // Check title format + if !strings.Contains(result.Title, server.URL) { + t.Error("Title should contain URL") + } + if !strings.Contains(result.Title, "text/html") { + t.Error("Title should contain content type") + } +} + +func TestWebFetchTool_EinoTool(t *testing.T) { + tool := NewWebFetchTool("/tmp") + einoTool := tool.EinoTool() + + if einoTool == nil { + t.Error("EinoTool should not return nil") + } + + info, err := einoTool.Info(context.Background()) + if err != nil { + t.Fatalf("Info failed: %v", err) + } + + if info.Name != "webfetch" { + t.Errorf("Expected name 'webfetch', got %q", info.Name) + } +} + +func TestExtractTextFromHTML(t *testing.T) { + tests := []struct { + name string + html string + wantText string + wantNot []string + }{ + { + name: "basic text", + html: "

Hello World

", + wantText: "Hello World", + wantNot: []string{}, + }, + { + name: "skip script", + html: "

Text

", + wantText: "Text", + wantNot: []string{"alert", "bad"}, + }, + { + name: "skip style", + html: "

Text

", + wantText: "Text", + wantNot: []string{"color", "red"}, + }, + { + name: "skip noscript", + html: "

Text

", + wantText: "Text", + wantNot: []string{"Enable JS"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := extractTextFromHTML(tt.html) + if err != nil { + t.Fatalf("extractTextFromHTML failed: %v", err) + } + + if !strings.Contains(result, tt.wantText) { + t.Errorf("Expected text %q not found in result: %q", tt.wantText, result) + } + + for _, notWant := range tt.wantNot { + if strings.Contains(result, notWant) { + t.Errorf("Unexpected text %q found in result: %q", notWant, result) + } + } + }) + } +} + +func TestConvertHTMLToMarkdown(t *testing.T) { + tests := []struct { + name string + html string + wantMD []string + wantNot []string + }{ + { + name: "heading", + html: "

Title

", + wantMD: []string{"# Title"}, + wantNot: []string{}, + }, + { + name: "bold", + html: "

Bold

", + wantMD: []string{"**Bold**"}, + wantNot: []string{}, + }, + { + name: "italic", + html: "

Italic

", + wantMD: []string{"*Italic*"}, + wantNot: []string{}, + }, + { + name: "list", + html: "", + wantMD: []string{"- Item 1", "- Item 2"}, + wantNot: []string{}, + }, + { + name: "skip script", + html: "

Text

", + wantMD: []string{"Text"}, + wantNot: []string{"bad", "script"}, + }, + { + name: "horizontal rule", + html: "

Above


Below

", + wantMD: []string{"---"}, + wantNot: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := convertHTMLToMarkdown(tt.html) + if err != nil { + t.Fatalf("convertHTMLToMarkdown failed: %v", err) + } + + for _, want := range tt.wantMD { + if !strings.Contains(result, want) { + t.Errorf("Expected markdown %q not found in result: %q", want, result) + } + } + + for _, notWant := range tt.wantNot { + if strings.Contains(result, notWant) { + t.Errorf("Unexpected text %q found in result: %q", notWant, result) + } + } + }) + } +} From 7acf79d10589ecdc58f95ab16167adaa35799783 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 7 Dec 2025 19:20:13 +0000 Subject: [PATCH 2/3] refactor(go-opencode): Move SubagentExecutor to executor package Move SubagentExecutor from internal/tool to internal/executor to break the import cycle between tool and session packages. The cycle was: - tool/subagent_executor.go imported session - session/processor.go imported tool Now: - tool package no longer imports session - session package imports tool - executor package imports both tool and session This allows the tool package (including WebFetchTool) to build and test properly. --- go-opencode/cmd/opencode/commands/run.go | 3 +- go-opencode/cmd/opencode/commands/serve.go | 3 +- .../subagent.go} | 44 +++++++++---------- 3 files changed, 26 insertions(+), 24 deletions(-) rename go-opencode/internal/{tool/subagent_executor.go => executor/subagent.go} (92%) diff --git a/go-opencode/cmd/opencode/commands/run.go b/go-opencode/cmd/opencode/commands/run.go index a343409d42b..bf61d039daa 100644 --- a/go-opencode/cmd/opencode/commands/run.go +++ b/go-opencode/cmd/opencode/commands/run.go @@ -8,6 +8,7 @@ import ( "github.com/opencode-ai/opencode/internal/agent" "github.com/opencode-ai/opencode/internal/config" + "github.com/opencode-ai/opencode/internal/executor" "github.com/opencode-ai/opencode/internal/mcp" "github.com/opencode-ai/opencode/internal/permission" "github.com/opencode-ai/opencode/internal/provider" @@ -198,7 +199,7 @@ func runInteractive(cmd *cobra.Command, args []string) error { } // Create and configure SubagentExecutor for task tool - subagentExecutor := tool.NewSubagentExecutor(tool.SubagentExecutorConfig{ + subagentExecutor := executor.NewSubagentExecutor(executor.SubagentExecutorConfig{ Storage: store, ProviderRegistry: providerReg, ToolRegistry: toolReg, diff --git a/go-opencode/cmd/opencode/commands/serve.go b/go-opencode/cmd/opencode/commands/serve.go index 30476be7ddb..6d98e40aa6b 100644 --- a/go-opencode/cmd/opencode/commands/serve.go +++ b/go-opencode/cmd/opencode/commands/serve.go @@ -12,6 +12,7 @@ import ( "github.com/opencode-ai/opencode/internal/agent" "github.com/opencode-ai/opencode/internal/config" + "github.com/opencode-ai/opencode/internal/executor" "github.com/opencode-ai/opencode/internal/logging" "github.com/opencode-ai/opencode/internal/mcp" "github.com/opencode-ai/opencode/internal/provider" @@ -108,7 +109,7 @@ func runServe(cmd *cobra.Command, args []string) error { } // Create and configure SubagentExecutor for task tool - subagentExecutor := tool.NewSubagentExecutor(tool.SubagentExecutorConfig{ + subagentExecutor := executor.NewSubagentExecutor(executor.SubagentExecutorConfig{ Storage: store, ProviderRegistry: providerReg, ToolRegistry: toolReg, diff --git a/go-opencode/internal/tool/subagent_executor.go b/go-opencode/internal/executor/subagent.go similarity index 92% rename from go-opencode/internal/tool/subagent_executor.go rename to go-opencode/internal/executor/subagent.go index 850f120a087..758e30deefc 100644 --- a/go-opencode/internal/tool/subagent_executor.go +++ b/go-opencode/internal/executor/subagent.go @@ -1,5 +1,5 @@ -// Package tool provides tool implementations for the agentic loop. -package tool +// Package executor provides task execution implementations. +package executor import ( "context" @@ -17,17 +17,18 @@ import ( "github.com/opencode-ai/opencode/internal/provider" "github.com/opencode-ai/opencode/internal/session" "github.com/opencode-ai/opencode/internal/storage" + "github.com/opencode-ai/opencode/internal/tool" "github.com/opencode-ai/opencode/pkg/types" ) -// SubagentExecutor implements TaskExecutor to run subagent tasks. +// SubagentExecutor implements tool.TaskExecutor to run subagent tasks. type SubagentExecutor struct { - storage *storage.Storage - providerRegistry *provider.Registry - toolRegistry *Registry + storage *storage.Storage + providerRegistry *provider.Registry + toolRegistry *tool.Registry permissionChecker *permission.Checker - agentRegistry *agent.Registry - workDir string + agentRegistry *agent.Registry + workDir string // Default provider and model settings defaultProviderID string @@ -38,7 +39,7 @@ type SubagentExecutor struct { type SubagentExecutorConfig struct { Storage *storage.Storage ProviderRegistry *provider.Registry - ToolRegistry *Registry + ToolRegistry *tool.Registry PermissionChecker *permission.Checker AgentRegistry *agent.Registry WorkDir string @@ -60,15 +61,15 @@ func NewSubagentExecutor(cfg SubagentExecutorConfig) *SubagentExecutor { } } -// ExecuteSubtask implements TaskExecutor.ExecuteSubtask. +// ExecuteSubtask implements tool.TaskExecutor.ExecuteSubtask. // It creates a child session, runs the subagent, and returns the result. func (e *SubagentExecutor) ExecuteSubtask( ctx context.Context, parentSessionID string, agentName string, prompt string, - opts TaskOptions, -) (*TaskResult, error) { + opts tool.TaskOptions, +) (*tool.TaskResult, error) { // Get the agent configuration agentConfig, err := e.agentRegistry.Get(agentName) if err != nil { @@ -119,7 +120,7 @@ func (e *SubagentExecutor) ExecuteSubtask( }) if err != nil { - return &TaskResult{ + return &tool.TaskResult{ Output: fmt.Sprintf("Error executing subtask: %s", err.Error()), SessionID: childSession.ID, Error: err.Error(), @@ -133,7 +134,7 @@ func (e *SubagentExecutor) ExecuteSubtask( // Extract text content from response output := extractTextContent(responseParts) - return &TaskResult{ + return &tool.TaskResult{ Output: output, SessionID: childSession.ID, AgentID: agentName, @@ -173,7 +174,7 @@ func (e *SubagentExecutor) createChildSession(ctx context.Context, parentSession // Create project ID from directory projectID := hashDirectory(directory) - session := &types.Session{ + sess := &types.Session{ ID: sessionID, ProjectID: projectID, Directory: directory, @@ -191,17 +192,17 @@ func (e *SubagentExecutor) createChildSession(ctx context.Context, parentSession }, } - if err := e.storage.Put(ctx, []string{"session", projectID, session.ID}, session); err != nil { + if err := e.storage.Put(ctx, []string{"session", projectID, sess.ID}, sess); err != nil { return nil, fmt.Errorf("failed to save child session: %w", err) } // Publish session created event event.PublishSync(event.Event{ Type: event.SessionCreated, - Data: event.SessionCreatedData{Info: session}, + Data: event.SessionCreatedData{Info: sess}, }) - return session, nil + return sess, nil } // createUserMessage creates a user message with the prompt. @@ -216,9 +217,9 @@ func (e *SubagentExecutor) createUserMessage( msgID := ulid.Make().String() msg := &types.Message{ - ID: msgID, - SessionID: sess.ID, - Role: "user", + ID: msgID, + SessionID: sess.ID, + Role: "user", ProviderID: providerID, ModelID: modelID, Model: &types.ModelRef{ @@ -368,7 +369,6 @@ func extractTextContent(parts []types.Part) string { } // hashDirectory creates a project ID from a directory path. -// This is duplicated from session package to avoid circular imports. func hashDirectory(directory string) string { h := sha256.New() h.Write([]byte(directory)) From 85fccd9cc6f77e585a42bee3bd26a715cb3ff490 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 7 Dec 2025 19:32:17 +0000 Subject: [PATCH 3/3] docs(go-opencode): Add architecture.md with package dependencies Document the package structure, dependencies, and design principles for go-opencode. Includes: - ASCII dependency graph - Package descriptions for all 18 internal packages - Public API and command packages - Dependency statistics and hub packages - Key design principles - External dependency list --- go-opencode/architecture.md | 156 ++++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 go-opencode/architecture.md diff --git a/go-opencode/architecture.md b/go-opencode/architecture.md new file mode 100644 index 00000000000..dcf80cc6719 --- /dev/null +++ b/go-opencode/architecture.md @@ -0,0 +1,156 @@ +# Go-OpenCode Architecture + +This document describes the package structure and dependencies of the Go OpenCode implementation. + +## Package Dependency Graph + +``` + ┌─────────────┐ + │ server │ ◄── HTTP API Layer + └──────┬──────┘ + │ + ┌────────────────────────────────┼────────────────────────────────┐ + │ │ │ + ▼ ▼ ▼ + ┌───────────┐ ┌───────────┐ ┌───────────┐ + │ session │ │ mcp │ │ executor │ + └─────┬─────┘ └─────┬─────┘ └─────┬─────┘ + │ │ │ + │ ┌─────────────────────┴────────────────────────────────┤ + │ │ │ + ▼ ▼ ▼ + ┌───────────────────┐ ┌───────────┐ + │ tool │ ◄────────────────────────────────────── │ provider │ + └─────────┬─────────┘ └───────────┘ + │ + ┌─────────┴─────────┬────────────────┐ + │ │ │ + ▼ ▼ ▼ +┌─────────┐ ┌───────────┐ ┌───────────┐ +│ agent │ │ storage │ │ event │ +└────┬────┘ └───────────┘ └───────────┘ + │ + ▼ +┌────────────┐ +│ permission │ +└────────────┘ +``` + +## Package Descriptions + +### Command Layer (`cmd/`) + +| Package | Description | +|---------|-------------| +| `cmd/opencode` | Main CLI application entry point. Provides `run` and `serve` commands. | +| `cmd/calculator-mcp` | Example MCP server implementation for testing. | + +### Public API (`pkg/`) + +| Package | Description | +|---------|-------------| +| `pkg/types` | Shared type definitions used across the codebase (Session, Message, Part, etc.). | +| `pkg/mcpserver` | MCP Server implementation utilities for building MCP servers. | + +### Internal Packages (`internal/`) + +#### Foundation Layer (No internal dependencies) + +| Package | Description | +|---------|-------------| +| `event` | Type-safe pub/sub event system. Enables decoupled communication between components for session events, message updates, and tool execution notifications. | +| `logging` | Structured logging using zerolog. Provides consistent logging across all packages. | +| `storage` | File-based JSON storage matching TypeScript implementation. Provides persistent storage for sessions, messages, and parts. | +| `formatter` | Code formatting integration. Supports automatic code formatting via external tools. | +| `lsp` | Language Server Protocol client. Provides code intelligence and symbol search capabilities. | +| `sharing` | Session sharing and collaboration features. | +| `command` | Flexible command execution system. Supports templated commands with variable substitution from configuration or markdown files. | + +#### Configuration Layer + +| Package | Description | Dependencies | +|---------|-------------|--------------| +| `config` | Configuration loading, merging, and path management. Handles hierarchical loading from multiple sources (global, project, environment) with TypeScript compatibility. | `logging` | + +#### Permission & Safety Layer + +| Package | Description | Dependencies | +|---------|-------------|--------------| +| `permission` | Comprehensive permission control system. Manages user consent for file editing, bash commands, web fetching, and external directory access. | `event` | +| `agent` | Multi-agent configuration and management. Implements flexible agent system with different operation modes (primary/subagent) and tool access controls. | `permission` | + +#### Tool & Execution Layer + +| Package | Description | Dependencies | +|---------|-------------|--------------| +| `tool` | Tool registry and execution framework. Manages tool registration, lookup, and execution. Includes built-in tools: Read, Write, Edit, Bash, Glob, Grep, List, WebFetch, TodoRead, TodoWrite, Task. | `agent`, `event`, `permission`, `storage` | +| `mcp` | Model Context Protocol (MCP) client. Connects to external MCP servers and exposes their tools, resources, and prompts to the LLM. | `tool` | +| `clienttool` | Registry for client-side tools. Enables external clients to register and execute tools via HTTP API. | `event` | +| `executor` | Task execution implementations. Runs subagent tasks by creating child sessions and managing their lifecycle. | `agent`, `event`, `permission`, `provider`, `session`, `storage`, `tool` | + +#### Provider Layer + +| Package | Description | Dependencies | +|---------|-------------|--------------| +| `provider` | LLM provider abstraction layer using Eino framework. Supports Anthropic (Claude), OpenAI (GPT), and Volcengine ARK models. Handles streaming, tool calls, and message formatting. | (none) | + +#### Session Layer + +| Package | Description | Dependencies | +|---------|-------------|--------------| +| `session` | Core agentic loop and session management. Manages conversations, message processing, tool execution, and session state. Implements the main LLM interaction loop with tool calling. | `event`, `logging`, `permission`, `provider`, `storage`, `tool` | + +#### Integration Layer + +| Package | Description | Dependencies | +|---------|-------------|--------------| +| `vcs` | Version control system (Git) integration. Provides repository status, diff tracking, and file change monitoring. | `event` | + +#### API Layer + +| Package | Description | Dependencies | +|---------|-------------|--------------| +| `server` | HTTP server implementation for OpenCode API. Provides RESTful endpoints for sessions, messages, files, config, events (SSE), MCP management, and client tools. | `clienttool`, `command`, `event`, `formatter`, `logging`, `lsp`, `mcp`, `provider`, `session`, `storage`, `tool`, `vcs` | + +## Dependency Statistics + +### Most Depended Upon (Foundation Packages) + +1. **event** - 6 dependents (core pub/sub infrastructure) +2. **tool** - 5 dependents (tool execution framework) +3. **storage** - 4 dependents (persistence layer) +4. **provider** - 4 dependents (LLM abstraction) +5. **permission** - 4 dependents (safety controls) + +### Hub Packages (High Fan-out) + +1. **server** - imports 12 internal packages (API orchestration) +2. **executor** - imports 7 internal packages (subagent coordination) +3. **session** - imports 6 internal packages (agentic loop) + +## Key Design Principles + +1. **Layered Architecture**: Foundation packages (event, storage, logging) support higher-level abstractions (session, server). + +2. **No Circular Dependencies**: The import cycle between `tool` and `session` was resolved by extracting `executor` as a separate package. + +3. **Event-Driven Communication**: Components communicate via the `event` package for loose coupling. + +4. **Permission-First Design**: All potentially dangerous operations (file writes, bash commands) go through the `permission` package. + +5. **Provider Abstraction**: LLM providers are abstracted behind a common interface, enabling easy addition of new providers. + +## External Dependencies + +Key external packages used: + +| Package | Purpose | +|---------|---------| +| `github.com/cloudwego/eino` | LLM framework for provider abstraction | +| `github.com/go-chi/chi/v5` | HTTP router | +| `github.com/rs/zerolog` | Structured logging | +| `github.com/spf13/cobra` | CLI framework | +| `github.com/mark3labs/mcp-go` | MCP protocol implementation | +| `github.com/JohannesKaufmann/html-to-markdown` | HTML to Markdown conversion (WebFetch tool) | +| `github.com/PuerkitoBio/goquery` | HTML parsing and text extraction (WebFetch tool) | +| `github.com/sergi/go-diff` | Diff computation for file changes |