From 3df9ce85e00aeb853df8040d9cab27516cf5f6fc Mon Sep 17 00:00:00 2001 From: z Date: Mon, 30 Jun 2025 15:11:18 +1200 Subject: [PATCH] Add optional fields to ScrapeOptions, MapOptions, CrawlOptions, PageMetadata, Response, and CrawlData structs - Introduced new optional fields with serde serialization conditions to enhance flexibility in data handling. - Updated ScrapeOptions, MapOptions, CrawlOptions, PageMetadata, Response, and CrawlData to include additional metadata and configuration options. --- src/bless_crawl/mod.rs | 46 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/bless_crawl/mod.rs b/src/bless_crawl/mod.rs index 251ee6f..2a6e542 100644 --- a/src/bless_crawl/mod.rs +++ b/src/bless_crawl/mod.rs @@ -75,12 +75,17 @@ use mock_ffi::*; pub struct ScrapeOptions { pub timeout: u32, pub wait_time: u32, + #[serde(skip_serializing_if = "Option::is_none")] pub include_tags: Option>, + #[serde(skip_serializing_if = "Option::is_none")] pub exclude_tags: Option>, pub only_main_content: bool, pub format: Format, + #[serde(skip_serializing_if = "Option::is_none")] pub viewport: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub user_agent: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub headers: Option>, } @@ -125,58 +130,97 @@ impl std::str::FromStr for Format { #[derive(Debug, Clone, Default, PartialEq, serde::Serialize)] pub struct Viewport { + #[serde(skip_serializing_if = "Option::is_none")] pub width: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub height: Option, } #[derive(Debug, Clone, Default, PartialEq, serde::Serialize)] pub struct MapOptions { + #[serde(skip_serializing_if = "Option::is_none")] pub link_types: Option>, + #[serde(skip_serializing_if = "Option::is_none")] pub base_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub filter_extensions: Option>, } #[derive(Debug, Clone, Default, PartialEq, serde::Serialize)] pub struct CrawlOptions { + #[serde(skip_serializing_if = "Option::is_none")] pub limit: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub max_depth: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub exclude_paths: Option>, + #[serde(skip_serializing_if = "Option::is_none")] pub include_paths: Option>, + #[serde(skip_serializing_if = "Option::is_none")] pub follow_external: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub delay_between_requests: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub parallel_requests: Option, } #[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] pub struct PageMetadata { + #[serde(skip_serializing_if = "Option::is_none")] pub title: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub description: Option, pub url: String, pub status_code: u16, + #[serde(skip_serializing_if = "Option::is_none")] pub language: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub keywords: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub robots: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub author: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub creator: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub publisher: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub og_title: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub og_description: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub og_image: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub og_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub og_site_name: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub og_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub twitter_title: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub twitter_description: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub twitter_image: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub twitter_card: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub twitter_site: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub twitter_creator: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub favicon: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub viewport: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub referrer: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub content_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub scrape_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub source_url: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub proxy_used: Option, } @@ -192,6 +236,7 @@ pub struct ScrapeData { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct Response { pub success: bool, + #[serde(skip_serializing_if = "Option::is_none")] pub error: Option, pub data: T, } @@ -222,6 +267,7 @@ pub struct CrawlError { pub struct CrawlData { pub root_url: String, pub pages: Vec, + #[serde(skip_serializing_if = "Option::is_none")] pub link_map: Option, pub depth_reached: u8, pub total_pages: usize,