diff --git a/Cargo.toml b/Cargo.toml
index a9e2388..48b0121 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,11 +10,13 @@ license = "MIT/Apache-2.0"
 repository = "https://github.com/blocklessnetwork/sdk-rust"
 
 [dependencies]
+htmd = { version = "0.2.2", default-features = false }
 json = { version = "0.12", default-features = false }
+kuchikiki = { version = "0.8", default-features = false }
+regex = { version = "1.11.1", default-features = false, features = ["unicode-case"] }
 serde = { version = "1.0", features = ["derive"], optional = true }
-
-[dev-dependencies]
-serde_json = "1.0"
+serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
+url = { version = "2.5", default-features = false }
 
 [features]
 default = ["serde"]
diff --git a/README.md b/README.md
index 8e7d9dc..46a799e 100644
--- a/README.md
+++ b/README.md
@@ -87,7 +87,7 @@ cargo build --release --target wasm32-wasip1 --example llm-mcp
 | [httpbin](./examples/httpbin.rs) | HTTP to query anything from httpbin | ✅ | ✅ |
 | [llm](./examples/llm.rs) | LLM to chat with `Llama-3.1-8B-Instruct-q4f32_1-MLC` and `SmolLM2-1.7B-Instruct-q4f16_1-MLC` models | ✅ | ✅ |
 | [llm-mcp](./examples/llm-mcp.rs) | LLM with MCP (Model Control Protocol) demonstrating tool integration using SSE endpoints | ✅ | ✅ |
-
+| [web-scrape](./examples/web-scrape.rs) | Web Scraping to scrape content from a single URL with custom configuration overrides | ✅ | ❌ |
 
 ## Testing
 
diff --git a/examples/web-scrape.rs b/examples/web-scrape.rs
new file mode 100644
index 0000000..d4165a3
--- /dev/null
+++ b/examples/web-scrape.rs
@@ -0,0 +1,93 @@
+use blockless_sdk::*;
+
+/// This example demonstrates how to use the Blockless SDK to perform web scraping
+/// using the BlessCrawl functionality.
+///
+/// It shows how to:
+/// - Create a BlessCrawl instance with default configuration
+/// - Scrape content from a single URL with custom configuration overrides
+/// - Map links from a webpage to discover available URLs
+/// - Handle errors and responses appropriately
+fn main() {
+    println!("=== Blockless Web Scraping SDK Example ===\n");
+
+    example_scraping();
+    example_mapping();
+    example_crawling();
+}
+
+fn example_scraping() {
+    println!("--- Example 1: Basic Web Scraping ---");
+
+    let url = "https://example.com";
+    println!("scraping: {}...", url);
+
+    // First scrape with default config
+    let response = BlessCrawl::default()
+        .scrape(url, None)
+        .expect("Failed to scrape");
+    println!("response with default config: {:?}", response);
+    println!();
+    println!(
+        "---------- markdown ----------\n{}\n------------------------------",
+        response.data.content
+    );
+}
+
+fn example_mapping() {
+    println!("--- Example 2: Link Mapping/Discovery ---");
+
+    let url = "https://example.com";
+    println!("Mapping links from: {}", url);
+
+    let options = MapOptions::new()
+        .with_link_types(vec!["internal".to_string(), "external".to_string()])
+        .with_base_url(url.to_string())
+        .with_filter_extensions(vec![".html".to_string(), ".htm".to_string()]);
+
+    let response = BlessCrawl::default()
+        .map(url, Some(options))
+        .expect("Failed to map");
+    println!("response: {:?}", response);
+    println!();
+    println!(
+        "------------ links ------------\n{:?}\n------------------------------",
+        response.data.links
+    );
+    println!();
+    println!(
+        "------------ total links ------------\n{}\n------------------------------",
+        response.data.total_links
+    );
+}
+
+fn example_crawling() {
+    println!("--- Example 3: Recursive Website Crawling ---");
+
+    let url = "https://example.com";
+    println!("Crawling website: {}", url);
+
+    let options = CrawlOptions::new()
+        .with_max_depth(2)
+        .with_limit(10)
+        .with_include_paths(vec!["/".to_string()])
+        .with_exclude_paths(vec!["/admin/".to_string(), "/api/".to_string()])
+        .with_follow_external(false)
+        .with_delay_between_requests(1000)
+        .with_parallel_requests(3);
+
+    let response = BlessCrawl::default()
+        .crawl(url, Some(options))
+        .expect("Failed to crawl");
+    println!("response: {:?}", response);
+    println!();
+    println!(
+        "------------ pages ------------\n{:?}\n------------------------------",
+        response.data.pages
+    );
+    println!();
+    println!(
+        "------------ total pages ------------\n{}\n------------------------------",
+        response.data.total_pages
+    );
+}
diff --git a/src/bless_crawl/html_to_markdown.rs b/src/bless_crawl/html_to_markdown.rs
new file mode 100644
index 0000000..9137634
--- /dev/null
+++ b/src/bless_crawl/html_to_markdown.rs
@@ -0,0 +1,119 @@
+use htmd::HtmlToMarkdown;
+use regex::Regex;
+
+/// Parses HTML content and converts it to Markdown
+///
+/// This function replicates the behavior of the JavaScript parseMarkdown function:
+/// - Converts HTML to Markdown using htmd
+/// - Processes multi-line links by escaping newlines inside link content
+/// - Removes "Skip to Content" links
+/// - Returns empty string for empty/null input
+pub fn parse_markdown(html: &str) -> String {
+    if html.is_empty() {
+        return String::new();
+    }
+
+    // Convert HTML to Markdown using htmd
+    let markdown = match HtmlToMarkdown::new().convert(html) {
+        Ok(md) => md,
+        Err(_) => {
+            // Return empty string if conversion fails
+            return String::new();
+        }
+    };
+
+    // Process the markdown content
+    let processed_markdown = process_multiline_links(&markdown);
+    remove_skip_to_content_links(&processed_markdown)
+}
+
+/// Processes multi-line links by escaping newlines inside link content
+///
+/// This function replicates the JavaScript processMultiLineLinks function:
+/// - Tracks when we're inside link content (between [ and ])
+/// - Escapes newlines with backslash when inside links
+fn process_multiline_links(markdown_content: &str) -> String {
+    let mut new_markdown_content = String::new();
+    let mut link_open_count: usize = 0;
+
+    for ch in markdown_content.chars() {
+        match ch {
+            '[' => {
+                link_open_count += 1;
+            }
+            ']' => {
+                link_open_count = link_open_count.saturating_sub(1);
+            }
+            _ => {}
+        }
+
+        let inside_link_content = link_open_count > 0;
+
+        if inside_link_content && ch == '\n' {
+            new_markdown_content.push('\\');
+            new_markdown_content.push('\n');
+        } else {
+            new_markdown_content.push(ch);
+        }
+    }
+
+    new_markdown_content
+}
+
+/// Removes "Skip to Content" links from the markdown content
+///
+/// This function replicates the JavaScript removeSkipToContentLinks function:
+/// - Removes [Skip to Content](#page) and [Skip to content](#skip) patterns
+/// - Case-insensitive matching
+fn remove_skip_to_content_links(markdown_content: &str) -> String {
+    let re = Regex::new(r"(?i)\[Skip to Content\]\(#[^)]*\)").unwrap();
+    re.replace_all(markdown_content, "").to_string()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_markdown_simple() {
+        let html = "<p>Hello, world!</p>";
+        let result = parse_markdown(html);
+        assert_eq!(result.trim(), "Hello, world!");
+    }
+
+    #[test]
+    fn test_parse_markdown_complex() {
+        let html =
+            "<div><p>Hello <strong>bold</strong> world!</p><ul><li>List item</li></ul></div>";
+        let result = parse_markdown(html);
+        assert_eq!(result.trim(), "Hello **bold** world!\n\n*   List item");
+    }
+
+    #[test]
+    fn test_parse_markdown_empty() {
+        let html = "";
+        let result = parse_markdown(html);
+        assert_eq!(result, "");
+    }
+
+    #[test]
+    fn test_process_multiline_links() {
+        let markdown = "[Link\nwith newline](http://example.com)";
+        let result = process_multiline_links(markdown);
+        assert_eq!(result, "[Link\\\nwith newline](http://example.com)");
+    }
+
+    #[test]
+    fn test_remove_skip_to_content_links() {
+        let markdown = "Some content [Skip to Content](#page) more content";
+        let result = remove_skip_to_content_links(markdown);
+        assert_eq!(result, "Some content  more content");
+    }
+
+    #[test]
+    fn test_remove_skip_to_content_links_case_insensitive() {
+        let markdown = "Some content [Skip to content](#skip) more content";
+        let result = remove_skip_to_content_links(markdown);
+        assert_eq!(result, "Some content  more content");
+    }
+}
diff --git a/src/bless_crawl/html_transform.rs b/src/bless_crawl/html_transform.rs
new file mode 100644
index 0000000..8c56ebe
--- /dev/null
+++ b/src/bless_crawl/html_transform.rs
@@ -0,0 +1,374 @@
+use kuchikiki::{parse_html, traits::TendrilSink};
+use serde::{Deserialize, Serialize};
+use url::Url;
+
+const EXCLUDE_NON_MAIN_TAGS: [&str; 41] = [
+    "header",
+    "footer",
+    "nav",
+    "aside",
+    ".header",
+    ".top",
+    ".navbar",
+    "#header",
+    ".footer",
+    ".bottom",
+    "#footer",
+    ".sidebar",
+    ".side",
+    ".aside",
+    "#sidebar",
+    ".modal",
+    ".popup",
+    "#modal",
+    ".overlay",
+    ".ad",
+    ".ads",
+    ".advert",
+    "#ad",
+    ".lang-selector",
+    ".language",
+    "#language-selector",
+    ".social",
+    ".social-media",
+    ".social-links",
+    "#social",
+    ".menu",
+    ".navigation",
+    "#nav",
+    ".breadcrumbs",
+    "#breadcrumbs",
+    ".share",
+    "#share",
+    ".widget",
+    "#widget",
+    ".cookie",
+    "#cookie",
+];
+
+const FORCE_INCLUDE_MAIN_TAGS: [&str; 13] = [
+    "#main",
+    // swoogo event software as .widget in all of their content
+    ".swoogo-cols",
+    ".swoogo-text",
+    ".swoogo-table-div",
+    ".swoogo-space",
+    ".swoogo-alert",
+    ".swoogo-sponsors",
+    ".swoogo-title",
+    ".swoogo-tabs",
+    ".swoogo-logo",
+    ".swoogo-image",
+    ".swoogo-button",
+    ".swoogo-agenda",
+];
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct TransformHtmlOptions {
+    pub html: String,
+    pub url: String,
+    pub include_tags: Vec<String>,
+    pub exclude_tags: Vec<String>,
+    pub only_main_content: bool,
+}
+
+#[derive(Debug)]
+struct ImageSource {
+    url: String,
+    size: i32,
+    is_x: bool,
+}
+
+#[derive(Debug)]
+pub enum HtmlTransformError {
+    ParseError,
+    UrlParseError,
+    SelectError,
+}
+
+impl std::fmt::Display for HtmlTransformError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            HtmlTransformError::ParseError => write!(f, "Failed to parse HTML"),
+            HtmlTransformError::UrlParseError => write!(f, "Failed to parse URL"),
+            HtmlTransformError::SelectError => write!(f, "Failed to select HTML elements"),
+        }
+    }
+}
+
+impl std::error::Error for HtmlTransformError {}
+
+/// Transforms HTML by removing unwanted elements, filtering tags, and processing URLs
+pub fn transform_html(opts: TransformHtmlOptions) -> Result<String, HtmlTransformError> {
+    let mut document = parse_html().one(opts.html);
+
+    // If include_tags is specified, only include those tags
+    if !opts.include_tags.is_empty() {
+        let new_document = parse_html().one("<div></div>");
+        let root = new_document
+            .select_first("div")
+            .map_err(|_| HtmlTransformError::SelectError)?;
+
+        for tag_selector in opts.include_tags.iter() {
+            let matching_nodes: Vec<_> = document
+                .select(tag_selector)
+                .map_err(|_| HtmlTransformError::SelectError)?
+                .collect();
+            for tag in matching_nodes {
+                root.as_node().append(tag.as_node().clone());
+            }
+        }
+
+        document = new_document;
+    }
+
+    // Remove unwanted elements
+    let unwanted_selectors = ["head", "meta", "noscript", "style", "script"];
+    for selector in &unwanted_selectors {
+        while let Ok(element) = document.select_first(selector) {
+            element.as_node().detach();
+        }
+    }
+
+    // Remove excluded tags
+    for tag_selector in opts.exclude_tags.iter() {
+        while let Ok(element) = document.select_first(tag_selector) {
+            element.as_node().detach();
+        }
+    }
+
+    // Remove non-main content if requested
+    if opts.only_main_content {
+        for selector in EXCLUDE_NON_MAIN_TAGS.iter() {
+            let elements: Vec<_> = document
+                .select(selector)
+                .map_err(|_| HtmlTransformError::SelectError)?
+                .collect();
+            for element in elements {
+                // Check if this element contains any force-include tags
+                let should_keep = FORCE_INCLUDE_MAIN_TAGS.iter().any(|force_selector| {
+                    element
+                        .as_node()
+                        .select(force_selector)
+                        .map(|mut iter| iter.next().is_some())
+                        .unwrap_or(false)
+                });
+
+                if !should_keep {
+                    element.as_node().detach();
+                }
+            }
+        }
+    }
+
+    // Process images with srcset attributes
+    let srcset_images: Vec<_> = document
+        .select("img[srcset]")
+        .map_err(|_| HtmlTransformError::SelectError)?
+        .collect();
+
+    for img in srcset_images {
+        let srcset = img.attributes.borrow().get("srcset").map(|s| s.to_string());
+        if let Some(srcset) = srcset {
+            let mut sizes: Vec<ImageSource> = srcset
+                .split(',')
+                .filter_map(|entry| {
+                    let tokens: Vec<&str> = entry.trim().split(' ').collect();
+                    if tokens.is_empty() {
+                        return None;
+                    }
+
+                    let size_token = if tokens.len() > 1 && !tokens[1].is_empty() {
+                        tokens[1]
+                    } else {
+                        "1x"
+                    };
+
+                    if let Ok(parsed_size) = size_token[..size_token.len() - 1].parse() {
+                        Some(ImageSource {
+                            url: tokens[0].to_string(),
+                            size: parsed_size,
+                            is_x: size_token.ends_with('x'),
+                        })
+                    } else {
+                        None
+                    }
+                })
+                .collect();
+
+            // Add src attribute as 1x if all sizes are x-based
+            if sizes.iter().all(|s| s.is_x) {
+                let src = img.attributes.borrow().get("src").map(|s| s.to_string());
+                if let Some(src) = src {
+                    sizes.push(ImageSource {
+                        url: src,
+                        size: 1,
+                        is_x: true,
+                    });
+                }
+            }
+
+            // Sort by size (largest first) and use the biggest image
+            sizes.sort_by(|a, b| b.size.cmp(&a.size));
+            if let Some(biggest) = sizes.first() {
+                img.attributes
+                    .borrow_mut()
+                    .insert("src", biggest.url.clone());
+            }
+        }
+    }
+
+    // Convert relative URLs to absolute URLs
+    let base_url = Url::parse(&opts.url).map_err(|_| HtmlTransformError::UrlParseError)?;
+
+    // Process image src attributes
+    let src_images: Vec<_> = document
+        .select("img[src]")
+        .map_err(|_| HtmlTransformError::SelectError)?
+        .collect();
+    for img in src_images {
+        let old_src = img.attributes.borrow().get("src").map(|s| s.to_string());
+        if let Some(old_src) = old_src {
+            if let Ok(new_url) = base_url.join(&old_src) {
+                img.attributes
+                    .borrow_mut()
+                    .insert("src", new_url.to_string());
+            }
+        }
+    }
+
+    // Process anchor href attributes
+    let href_anchors: Vec<_> = document
+        .select("a[href]")
+        .map_err(|_| HtmlTransformError::SelectError)?
+        .collect();
+    for anchor in href_anchors {
+        let old_href = anchor
+            .attributes
+            .borrow()
+            .get("href")
+            .map(|s| s.to_string());
+        if let Some(old_href) = old_href {
+            if let Ok(new_url) = base_url.join(&old_href) {
+                anchor
+                    .attributes
+                    .borrow_mut()
+                    .insert("href", new_url.to_string());
+            }
+        }
+    }
+
+    Ok(document.to_string())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_transform_html_removes_unwanted_elements() {
+        let opts = TransformHtmlOptions {
+            html: "<html><head><title>Test</title></head><body><p>Content</p><script>alert('test')</script></body></html>".to_string(),
+            url: "https://example.com".to_string(),
+            include_tags: vec![],
+            exclude_tags: vec![],
+            only_main_content: false,
+        };
+
+        let result = transform_html(opts).unwrap();
+        let expected = "<html><body><p>Content</p></body></html>";
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_transform_html_include_tags() {
+        let opts = TransformHtmlOptions {
+            html: "<html><body><div class=\"content\">Keep this</div><div class=\"sidebar\">Remove this</div></body></html>".to_string(),
+            url: "https://example.com".to_string(),
+            include_tags: vec![".content".to_string()],
+            exclude_tags: vec![],
+            only_main_content: false,
+        };
+
+        let result = transform_html(opts).unwrap();
+        let expected =
+            "<html><body><div><div class=\"content\">Keep this</div></div></body></html>";
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_transform_html_exclude_tags() {
+        let opts = TransformHtmlOptions {
+            html: "<html><body><div class=\"content\">Keep this</div><div class=\"ad\">Remove this</div></body></html>".to_string(),
+            url: "https://example.com".to_string(),
+            include_tags: vec![],
+            exclude_tags: vec![".ad".to_string()],
+            only_main_content: false,
+        };
+
+        let result = transform_html(opts).unwrap();
+        let expected = "<html><body><div class=\"content\">Keep this</div></body></html>";
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_transform_html_relative_urls() {
+        let opts = TransformHtmlOptions {
+            html: r#"<html><body><img src="/image.jpg"><a href="/page">Link</a></body></html>"#
+                .to_string(),
+            url: "https://example.com/subdir/".to_string(),
+            include_tags: vec![],
+            exclude_tags: vec![],
+            only_main_content: false,
+        };
+
+        let result = transform_html(opts).unwrap();
+        let expected = r#"<html><body><img src="https://example.com/image.jpg"><a href="https://example.com/page">Link</a></body></html>"#;
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_transform_html_only_main_content() {
+        let opts = TransformHtmlOptions {
+            html: "<html><body><header>Header</header><main><p>Main content</p></main><footer>Footer</footer></body></html>".to_string(),
+            url: "https://example.com".to_string(),
+            include_tags: vec![],
+            exclude_tags: vec![],
+            only_main_content: true,
+        };
+
+        let result = transform_html(opts).unwrap();
+        let expected = "<html><body><main><p>Main content</p></main></body></html>";
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_transform_html_srcset_processing() {
+        let opts = TransformHtmlOptions {
+            html: r#"<html><body><img srcset="/small.jpg 1x, /large.jpg 2x" src="/default.jpg"></body></html>"#.to_string(),
+            url: "https://example.com".to_string(),
+            include_tags: vec![],
+            exclude_tags: vec![],
+            only_main_content: false,
+        };
+
+        let result = transform_html(opts).unwrap();
+        let expected = r#"<html><body><img srcset="/small.jpg 1x, /large.jpg 2x" src="https://example.com/large.jpg"></body></html>"#;
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_transform_html_force_include_tags() {
+        let opts = TransformHtmlOptions {
+            html: r#"<html><body><div class="widget"><div id="main"><p>Important content</p></div></div><div class="sidebar">Sidebar</div></body></html>"#.to_string(),
+            url: "https://example.com".to_string(),
+            include_tags: vec![],
+            exclude_tags: vec![],
+            only_main_content: true,
+        };
+
+        let result = transform_html(opts).unwrap();
+        let expected = r#"<html><body><div class="widget"><div id="main"><p>Important content</p></div></div></body></html>"#;
+        assert_eq!(result, expected);
+    }
+}
diff --git a/src/bless_crawl/mod.rs b/src/bless_crawl/mod.rs
new file mode 100644
index 0000000..8a3ffc2
--- /dev/null
+++ b/src/bless_crawl/mod.rs
@@ -0,0 +1,746 @@
+//! # BlessCrawl - Distributed Web Scraping SDK
+//!
+//! Provides distributed web scraping across the BLESS network's browser nodes.
+//!
+//! ## Features
+//!
+//! - **scrape()**: Extract content from a URL as markdown
+//! - **map()**: Discover and extract all links from a webpage
+//! - **crawl()**: Recursively crawl websites with depth controls
+//!
+//! ## Limits
+//!
+//! - Timeout: 15s default, 120s max
+//! - Wait time: 3s default, 20s max
+//! - Buffer sizes: 2MB (scrape), 128KB (map), 8MB (crawl)
+
+mod html_to_markdown;
+mod html_transform;
+
+use html_to_markdown::parse_markdown;
+pub use html_transform::{transform_html, HtmlTransformError, TransformHtmlOptions};
+use std::collections::HashMap;
+
+type Handle = u32;
+type ExitCode = u8;
+
+#[cfg(not(feature = "mock-ffi"))]
+#[link(wasm_import_module = "bless_crawl")]
+extern "C" {
+    /// Scrape webpage content and return as markdown
+    #[allow(clippy::too_many_arguments)]
+    fn scrape(
+        h: *mut Handle,
+        url_ptr: *const u8,
+        url_len: usize,
+        options_ptr: *const u8,
+        options_len: usize,
+        result_ptr: *mut u8,
+        result_len: usize,
+        bytes_written: *mut usize,
+    ) -> ExitCode;
+
+    /// Extract and return all discoverable links from webpage
+    #[allow(clippy::too_many_arguments)]
+    fn map(
+        h: *mut Handle,
+        url_ptr: *const u8,
+        url_len: usize,
+        options_ptr: *const u8,
+        options_len: usize,
+        result_ptr: *mut u8,
+        result_len: usize,
+        bytes_written: *mut usize,
+    ) -> ExitCode;
+
+    /// Recursively crawl website starting from given URL
+    #[allow(clippy::too_many_arguments)]
+    fn crawl(
+        h: *mut Handle,
+        url_ptr: *const u8,
+        url_len: usize,
+        options_ptr: *const u8,
+        options_len: usize,
+        result_ptr: *mut u8,
+        result_len: usize,
+        bytes_written: *mut usize,
+    ) -> ExitCode;
+
+    /// Close and cleanup a web scraper instance
+    fn close(h: Handle) -> ExitCode;
+}
+
+#[cfg(feature = "mock-ffi")]
+#[allow(unused_variables)]
+mod mock_ffi {
+    use super::{ExitCode, Handle};
+
+    #[allow(clippy::too_many_arguments)]
+    pub unsafe fn scrape(
+        h: *mut Handle,
+        _url_ptr: *const u8,
+        _url_len: usize,
+        _options_ptr: *const u8,
+        _options_len: usize,
+        result_ptr: *mut u8,
+        result_len: usize,
+        bytes_written: *mut usize,
+    ) -> ExitCode {
+        1
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    pub unsafe fn map(
+        h: *mut Handle,
+        _url_ptr: *const u8,
+        _url_len: usize,
+        _options_ptr: *const u8,
+        _options_len: usize,
+        result_ptr: *mut u8,
+        result_len: usize,
+        bytes_written: *mut usize,
+    ) -> ExitCode {
+        1
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    pub unsafe fn crawl(
+        h: *mut Handle,
+        _url_ptr: *const u8,
+        _url_len: usize,
+        _options_ptr: *const u8,
+        _options_len: usize,
+        result_ptr: *mut u8,
+        result_len: usize,
+        bytes_written: *mut usize,
+    ) -> ExitCode {
+        1
+    }
+
+    pub unsafe fn close(_h: Handle) -> ExitCode {
+        1
+    }
+}
+
+#[cfg(feature = "mock-ffi")]
+use mock_ffi::*;
+
+#[derive(Debug, Clone, PartialEq, serde::Serialize)]
+pub struct ScrapeOptions {
+    pub timeout: u32,
+    pub wait_time: u32,
+    pub include_tags: Option<Vec<String>>,
+    pub exclude_tags: Option<Vec<String>>,
+    pub only_main_content: bool,
+    pub format: Format,
+    pub viewport: Option<Viewport>,
+    pub user_agent: Option<String>,
+    pub headers: Option<HashMap<String, String>>,
+}
+
+impl Default for ScrapeOptions {
+    fn default() -> Self {
+        Self {
+            timeout: BlessCrawl::DEFAULT_TIMEOUT_MS,
+            wait_time: BlessCrawl::DEFAULT_WAIT_TIME_MS,
+            include_tags: None,
+            exclude_tags: None,
+            only_main_content: false,
+            format: Format::Markdown,
+            viewport: None,
+            user_agent: None,
+            headers: None,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Default, PartialEq, serde::Serialize, serde::Deserialize)]
+pub enum Format {
+    #[default]
+    #[serde(rename = "markdown")]
+    Markdown,
+    #[serde(rename = "html")]
+    Html,
+    #[serde(rename = "json")]
+    Json,
+}
+
+impl std::str::FromStr for Format {
+    type Err = ();
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "markdown" => Ok(Format::Markdown),
+            "html" => Ok(Format::Html),
+            "json" => Ok(Format::Json),
+            _ => Err(()),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Default, PartialEq, serde::Serialize)]
+pub struct Viewport {
+    pub width: Option<u32>,
+    pub height: Option<u32>,
+}
+
+#[derive(Debug, Clone, Default, PartialEq, serde::Serialize)]
+pub struct MapOptions {
+    pub link_types: Option<Vec<String>>,
+    pub base_url: Option<String>,
+    pub filter_extensions: Option<Vec<String>>,
+}
+
+#[derive(Debug, Clone, Default, PartialEq, serde::Serialize)]
+pub struct CrawlOptions {
+    pub limit: Option<u32>,
+    pub max_depth: Option<u8>,
+    pub exclude_paths: Option<Vec<String>>,
+    pub include_paths: Option<Vec<String>>,
+    pub follow_external: Option<bool>,
+    pub delay_between_requests: Option<u32>,
+    pub parallel_requests: Option<u32>,
+}
+
+#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
+pub struct PageMetadata {
+    pub title: Option<String>,
+    pub description: Option<String>,
+    pub url: String,
+    pub status_code: u16,
+    pub language: Option<String>,
+    pub keywords: Option<String>,
+    pub robots: Option<String>,
+    pub author: Option<String>,
+    pub creator: Option<String>,
+    pub publisher: Option<String>,
+    pub og_title: Option<String>,
+    pub og_description: Option<String>,
+    pub og_image: Option<String>,
+    pub og_url: Option<String>,
+    pub og_site_name: Option<String>,
+    pub og_type: Option<String>,
+    pub twitter_title: Option<String>,
+    pub twitter_description: Option<String>,
+    pub twitter_image: Option<String>,
+    pub twitter_card: Option<String>,
+    pub twitter_site: Option<String>,
+    pub twitter_creator: Option<String>,
+    pub favicon: Option<String>,
+    pub viewport: Option<String>,
+    pub referrer: Option<String>,
+    pub content_type: Option<String>,
+    pub scrape_id: Option<String>,
+    pub source_url: Option<String>,
+    pub proxy_used: Option<String>,
+}
+
+#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
+pub struct ScrapeData {
+    pub success: bool,
+    pub timestamp: u64,
+    pub format: Format,
+    pub content: String,
+    pub metadata: PageMetadata,
+}
+
+#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
+pub struct Response<T> {
+    pub success: bool,
+    pub error: Option<String>,
+    pub data: T,
+}
+
+#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
+pub struct LinkInfo {
+    pub url: String,
+    // TODO: use enum instead of string
+    pub link_type: String, // "internal", "external", "anchor"
+}
+
+#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
+pub struct MapData {
+    pub url: String,
+    pub links: Vec<LinkInfo>,
+    pub total_links: usize,
+    pub timestamp: u64,
+}
+
+#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
+pub struct CrawlError {
+    pub url: String,
+    pub error: String,
+    pub depth: u32,
+}
+
+#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
+pub struct CrawlData<T> {
+    pub root_url: String,
+    pub pages: Vec<T>,
+    pub link_map: Option<MapData>,
+    pub depth_reached: u8,
+    pub total_pages: usize,
+    pub errors: Vec<CrawlError>,
+}
+
+impl ScrapeOptions {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn with_include_tags(mut self, tags: Vec<String>) -> Self {
+        self.include_tags = Some(tags);
+        self
+    }
+
+    pub fn with_exclude_tags(mut self, tags: Vec<String>) -> Self {
+        self.exclude_tags = Some(tags);
+        self
+    }
+
+    pub fn with_format(mut self, format: Format) -> Self {
+        self.format = format;
+        self
+    }
+
+    pub fn with_viewport(mut self, width: u32, height: u32) -> Self {
+        self.viewport = Some(Viewport {
+            width: Some(width),
+            height: Some(height),
+        });
+        self
+    }
+
+    pub fn with_user_agent(mut self, user_agent: String) -> Self {
+        self.user_agent = Some(user_agent);
+        self
+    }
+
+    pub fn with_headers(mut self, headers: HashMap<String, String>) -> Self {
+        self.headers = Some(headers);
+        self
+    }
+}
+
+impl MapOptions {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn with_link_types(mut self, link_types: Vec<String>) -> Self {
+        self.link_types = Some(link_types);
+        self
+    }
+
+    pub fn with_base_url(mut self, base_url: String) -> Self {
+        self.base_url = Some(base_url);
+        self
+    }
+
+    pub fn with_filter_extensions(mut self, extensions: Vec<String>) -> Self {
+        self.filter_extensions = Some(extensions);
+        self
+    }
+}
+
+impl CrawlOptions {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn with_limit(mut self, limit: u32) -> Self {
+        self.limit = Some(limit);
+        self
+    }
+
+    pub fn with_max_depth(mut self, max_depth: u8) -> Self {
+        self.max_depth = Some(max_depth);
+        self
+    }
+
+    pub fn with_exclude_paths(mut self, paths: Vec<String>) -> Self {
+        self.exclude_paths = Some(paths);
+        self
+    }
+
+    pub fn with_include_paths(mut self, paths: Vec<String>) -> Self {
+        self.include_paths = Some(paths);
+        self
+    }
+
+    pub fn with_follow_external(mut self, follow: bool) -> Self {
+        self.follow_external = Some(follow);
+        self
+    }
+
+    pub fn with_delay_between_requests(mut self, delay: u32) -> Self {
+        self.delay_between_requests = Some(delay);
+        self
+    }
+
+    pub fn with_parallel_requests(mut self, parallel: u32) -> Self {
+        self.parallel_requests = Some(parallel);
+        self
+    }
+}
+
+/// BlessCrawl client for distributed web scraping operations.
+#[derive(Debug, Clone, Default)]
+pub struct BlessCrawl {
+    inner: Handle,
+    config: ScrapeOptions,
+}
+
+impl BlessCrawl {
+    /// Default timeout in milliseconds (15 seconds)
+    pub const DEFAULT_TIMEOUT_MS: u32 = 15000;
+    /// Default wait time in milliseconds (3 seconds)
+    pub const DEFAULT_WAIT_TIME_MS: u32 = 3000;
+
+    /// Maximum timeout in milliseconds (2 minutes)
+    pub const MAX_TIMEOUT_MS: u32 = 120000;
+    /// Maximum wait time in milliseconds (20 seconds)
+    pub const MAX_WAIT_TIME_MS: u32 = 20000;
+
+    /// Maximum result buffer size in bytes (2MB)
+    pub const MAX_SCRAPE_BUFFER_SIZE: usize = 2 * 1024 * 1024;
+
+    /// Maximum result buffer size in bytes (1MB)
+    pub const MAX_MAP_BUFFER_SIZE: usize = 1024 * 1024;
+
+    /// Maximum result buffer size in bytes (8MB)
+    pub const MAX_CRAWL_BUFFER_SIZE: usize = 8 * 1024 * 1024;
+
+    /// Creates a new BlessCrawl instance with the given configuration.
+    pub fn with_config(config: ScrapeOptions) -> Result<Self, WebScrapeErrorKind> {
+        let instance = Self { inner: 0, config };
+        instance.validate_config(&instance.config)?;
+        Ok(instance)
+    }
+
+    fn validate_config(&self, config: &ScrapeOptions) -> Result<(), WebScrapeErrorKind> {
+        if config.timeout > Self::MAX_TIMEOUT_MS {
+            return Err(WebScrapeErrorKind::InvalidTimeout);
+        }
+        if config.wait_time > Self::MAX_WAIT_TIME_MS {
+            return Err(WebScrapeErrorKind::InvalidWaitTime);
+        }
+        Ok(())
+    }
+
+    /// Returns a reference to the current configuration.
+    pub fn get_config(&self) -> &ScrapeOptions {
+        &self.config
+    }
+
+    pub fn handle(&self) -> Handle {
+        self.inner
+    }
+
+    /// Scrapes webpage content and returns it as markdown with metadata.
+    pub fn scrape(
+        &self,
+        url: &str,
+        options: Option<ScrapeOptions>,
+    ) -> Result<Response<ScrapeData>, WebScrapeErrorKind> {
+        // Use provided options or fall back to instance config
+        let config = if let Some(opts) = options {
+            self.validate_config(&opts)?;
+            opts
+        } else {
+            self.config.clone()
+        };
+
+        let options_json = serde_json::to_vec(&config).unwrap();
+
+        let mut handle = self.inner;
+        let mut result_buf = vec![0u8; Self::MAX_SCRAPE_BUFFER_SIZE];
+        let mut bytes_written: usize = 0;
+
+        let code = unsafe {
+            scrape(
+                &mut handle,
+                url.as_ptr(),
+                url.len(),
+                options_json.as_ptr(),
+                options_json.len(),
+                result_buf.as_mut_ptr(),
+                result_buf.len(),
+                &mut bytes_written,
+            )
+        };
+
+        if code != 0 {
+            return Err(code.into());
+        }
+        if bytes_written == 0 {
+            return Err(WebScrapeErrorKind::EmptyResponse);
+        }
+        if bytes_written > result_buf.len() {
+            return Err(WebScrapeErrorKind::MemoryError);
+        }
+
+        let result_bytes =
+            unsafe { std::slice::from_raw_parts(result_buf.as_ptr(), bytes_written) };
+
+        // deserialize the result to host ScrapeResponse
+        let mut scrape_response = serde_json::from_slice::<Response<ScrapeData>>(result_bytes)
+            .map_err(|e| {
+                eprintln!("error: {:?}", e);
+                WebScrapeErrorKind::ParseError
+            })?;
+
+        if let Some(error) = scrape_response.error {
+            return Err(WebScrapeErrorKind::RuntimeError(error));
+        }
+
+        // post-process html
+        scrape_response.data.content = transform_html(TransformHtmlOptions {
+            html: scrape_response.data.content,
+            url: scrape_response.data.metadata.url.clone(),
+            include_tags: config.include_tags.unwrap_or_default(),
+            exclude_tags: config.exclude_tags.unwrap_or_default(),
+            only_main_content: config.only_main_content,
+        })
+        .map_err(|e| {
+            eprintln!("error: {:?}", e);
+            WebScrapeErrorKind::TransformError
+        })?;
+
+        // if the format is markdown, set the data to the markdown of the html
+        match config.format {
+            Format::Markdown => {
+                scrape_response.data.content = parse_markdown(&scrape_response.data.content);
+            }
+            Format::Html => (), // no need to do anything
+            Format::Json => unimplemented!(),
+        }
+
+        // convert the host ScrapeResponse to the user ScrapeResponse
+        Ok(scrape_response)
+    }
+
+    /// Extracts all links from a webpage, categorized by type.
+    pub fn map(
+        &self,
+        url: &str,
+        options: Option<MapOptions>,
+    ) -> Result<Response<MapData>, WebScrapeErrorKind> {
+        let mut combined_options = serde_json::to_value(&self.config).unwrap();
+        if let Some(map_opts) = options {
+            combined_options["map_options"] = serde_json::to_value(map_opts).unwrap();
+        }
+        let options_json = serde_json::to_vec(&combined_options).unwrap();
+
+        let mut result_buf = vec![0u8; Self::MAX_MAP_BUFFER_SIZE];
+        let mut bytes_written: usize = 0;
+
+        let mut handle = self.inner;
+        let code = unsafe {
+            map(
+                &mut handle,
+                url.as_ptr(),
+                url.len(),
+                options_json.as_ptr(),
+                options_json.len(),
+                result_buf.as_mut_ptr(),
+                result_buf.len(),
+                &mut bytes_written,
+            )
+        };
+
+        if code != 0 {
+            return Err(code.into());
+        }
+
+        if bytes_written == 0 {
+            return Err(WebScrapeErrorKind::EmptyResponse);
+        }
+
+        if bytes_written > result_buf.len() {
+            return Err(WebScrapeErrorKind::MemoryError);
+        }
+
+        let result_bytes =
+            unsafe { std::slice::from_raw_parts(result_buf.as_ptr(), bytes_written) };
+
+        // deserialize the result to MapResponse
+        let map_response =
+            serde_json::from_slice::<Response<MapData>>(result_bytes).map_err(|e| {
+                eprintln!("error: {:?}", e);
+                WebScrapeErrorKind::ParseError
+            })?;
+
+        if let Some(error) = map_response.error {
+            return Err(WebScrapeErrorKind::RuntimeError(error));
+        }
+
+        Ok(map_response)
+    }
+
+    /// Recursively crawls a website with configurable depth and filtering.
+    pub fn crawl(
+        &self,
+        url: &str,
+        options: Option<CrawlOptions>,
+    ) -> Result<Response<CrawlData<ScrapeData>>, WebScrapeErrorKind> {
+        let mut combined_options = serde_json::to_value(&self.config).unwrap();
+        if let Some(crawl_opts) = options {
+            combined_options["crawl_options"] = serde_json::to_value(crawl_opts).unwrap();
+        }
+        let options_json = serde_json::to_vec(&combined_options).unwrap();
+
+        let mut result_buf = vec![0u8; Self::MAX_CRAWL_BUFFER_SIZE];
+        let mut bytes_written: usize = 0;
+
+        let mut handle = self.inner;
+        let code = unsafe {
+            crawl(
+                &mut handle,
+                url.as_ptr(),
+                url.len(),
+                options_json.as_ptr(),
+                options_json.len(),
+                result_buf.as_mut_ptr(),
+                result_buf.len(),
+                &mut bytes_written,
+            )
+        };
+
+        if code != 0 {
+            return Err(code.into());
+        }
+
+        if bytes_written == 0 {
+            return Err(WebScrapeErrorKind::EmptyResponse);
+        }
+
+        if bytes_written > result_buf.len() {
+            return Err(WebScrapeErrorKind::MemoryError);
+        }
+
+        let result_bytes =
+            unsafe { std::slice::from_raw_parts(result_buf.as_ptr(), bytes_written) };
+
+        // deserialize the result to CrawlResponse
+        let mut host_crawl_response = serde_json::from_slice::<Response<CrawlData<ScrapeData>>>(
+            result_bytes,
+        )
+        .map_err(|e| {
+            eprintln!("error: {:?}", e);
+            WebScrapeErrorKind::ParseError
+        })?;
+
+        if let Some(error) = host_crawl_response.error {
+            return Err(WebScrapeErrorKind::RuntimeError(error));
+        }
+
+        // post-process html
+        for page in host_crawl_response.data.pages.iter_mut() {
+            page.content = transform_html(TransformHtmlOptions {
+                html: page.content.clone(),
+                url: page.metadata.url.clone(),
+                include_tags: self.config.include_tags.clone().unwrap_or_default(),
+                exclude_tags: self.config.exclude_tags.clone().unwrap_or_default(),
+                only_main_content: self.config.only_main_content,
+            })
+            .map_err(|e| {
+                eprintln!("error: {:?}", e);
+                WebScrapeErrorKind::TransformError
+            })?;
+
+            // if the format is markdown, set the content to the markdown of the html
+            match self.config.format {
+                Format::Markdown => {
+                    page.content = parse_markdown(&page.content);
+                }
+                Format::Html => (), // no need to do anything
+                Format::Json => unimplemented!(),
+            }
+        }
+
+        // convert the host CrawlResponse to the user CrawlResponse
+        Ok(host_crawl_response)
+    }
+}
+
+impl Drop for BlessCrawl {
+    fn drop(&mut self) {
+        // if the handle is 0, it means the instance was never initialized on the host
+        if self.inner == 0 {
+            return;
+        }
+        let code = unsafe { close(self.inner) };
+        if code != 0 {
+            eprintln!("Error closing web scraper: {}", code);
+        }
+    }
+}
+
+#[derive(Debug)]
+pub enum WebScrapeErrorKind {
+    InvalidUrl,
+    Timeout,
+    NetworkError,
+    RenderingError,
+    MemoryError,
+    DepthExceeded,
+    RateLimited,
+    TransformError,
+    Utf8Error,
+    ParseError,
+    ScrapeFailed,
+    MapFailed,
+    CrawlFailed,
+    EmptyResponse,
+    InvalidTimeout,
+    InvalidWaitTime,
+    RuntimeError(String),
+}
+
+impl From<u8> for WebScrapeErrorKind {
+    fn from(code: u8) -> Self {
+        match code {
+            1 => WebScrapeErrorKind::InvalidUrl,
+            2 => WebScrapeErrorKind::Timeout,
+            3 => WebScrapeErrorKind::NetworkError,
+            4 => WebScrapeErrorKind::RenderingError,
+            5 => WebScrapeErrorKind::MemoryError,
+            6 => WebScrapeErrorKind::DepthExceeded,
+            7 => WebScrapeErrorKind::RateLimited,
+            8 => WebScrapeErrorKind::TransformError,
+            9 => WebScrapeErrorKind::RuntimeError(String::from("Invalid timeout")),
+            10 => WebScrapeErrorKind::RuntimeError(String::from("Invalid wait time")),
+            _ => WebScrapeErrorKind::RuntimeError(String::from("Unknown error")),
+        }
+    }
+}
+
+impl std::fmt::Display for WebScrapeErrorKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            WebScrapeErrorKind::InvalidUrl => write!(f, "Invalid URL provided"),
+            WebScrapeErrorKind::Timeout => write!(f, "Request timeout"),
+            WebScrapeErrorKind::NetworkError => write!(f, "Network error"),
+            WebScrapeErrorKind::RenderingError => write!(f, "Page rendering error"),
+            WebScrapeErrorKind::MemoryError => write!(f, "Memory allocation error"),
+            WebScrapeErrorKind::DepthExceeded => write!(f, "Maximum crawl depth exceeded"),
+            WebScrapeErrorKind::RateLimited => write!(f, "Rate limited"),
+            WebScrapeErrorKind::TransformError => write!(f, "Transform error"),
+            WebScrapeErrorKind::Utf8Error => write!(f, "UTF-8 conversion error"),
+            WebScrapeErrorKind::ParseError => write!(f, "JSON parse error"),
+            WebScrapeErrorKind::ScrapeFailed => write!(f, "Scrape operation failed"),
+            WebScrapeErrorKind::MapFailed => write!(f, "Map operation failed"),
+            WebScrapeErrorKind::CrawlFailed => write!(f, "Crawl operation failed"),
+            WebScrapeErrorKind::EmptyResponse => write!(f, "Empty response from host"),
+            WebScrapeErrorKind::InvalidTimeout => {
+                write!(f, "Timeout exceeds maximum allowed (120s)")
+            }
+            WebScrapeErrorKind::InvalidWaitTime => {
+                write!(f, "Wait time exceeds maximum allowed (20s)")
+            }
+            WebScrapeErrorKind::RuntimeError(error) => write!(f, "Runtime error: {}", error),
+        }
+    }
+}
+
+impl std::error::Error for WebScrapeErrorKind {}
diff --git a/src/lib.rs b/src/lib.rs
index d67e81e..b60c611 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,4 @@
+mod bless_crawl;
 mod cgi;
 mod error;
 mod http;
@@ -5,6 +6,7 @@ mod llm;
 mod memory;
 mod socket;
 
+pub use bless_crawl::*;
 pub use cgi::*;
 pub use error::*;
 pub use http::*;