nicofretti · nicofretti · Jan 6, 2026 · Nov 23, 2025 · Nov 23, 2025 · Nov 23, 2025
@@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.3.0] - 2026-01-06 🚀
+
+### Added
+- Pipeline execution constraints for better control over pipeline runs
+- Integration with Langfuse for observability and monitoring
+- RAGAS metrics integration for RAG evaluation
+- Modal components for improved UI interactions
+- Block configuration view for enhanced block setup
+- Generator view for better visualization
+
+### Changed
+- Refactored codebase with Pydantic for improved data validation and type safety
+- Enhanced custom components theming
+
+### Fixed
+- Job cancellation bug that prevented proper pipeline stopping
+- Langfuse dataset naming issues
+
 ## [1.2.0] - 2025-11-17 🚀
 
 ### Added
@@ -49,6 +67,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - REST API for pipeline execution
 - Template system for common use cases
 
+[1.3.0]: https://github.com/nicofretti/DataGenFlow/compare/release-v1.2.0...release-v1.3.0
 [1.2.0]: https://github.com/nicofretti/DataGenFlow/compare/release-v1.1.0...release-v1.2.0
 [1.1.0]: https://github.com/nicofretti/DataGenFlow/compare/release...release-v1.1.0
 [1.0.0]: https://github.com/nicofretti/DataGenFlow/releases/tag/release
@@ -54,19 +54,48 @@ def is_multiplier_pipeline(blocks: list[dict[str, Any]]) -> bool:
     return getattr(block_class, "is_multiplier", False)
 
 
+def _patch_langfuse_usage_bug() -> None:
+    """patch litellm langfuse bug where .get() is called on pydantic model instead of dict"""
+    try:
+        from litellm.types.utils import CompletionUsage
+
+        if not hasattr(CompletionUsage, "get"):
+            # add get method so pydantic model works like dict
+            def pydantic_get(self, key, default=None):
+                return getattr(self, key, default)
+
+            CompletionUsage.get = pydantic_get
+    except (ImportError, AttributeError):
+        # best-effort patch: if litellm or CompletionUsage is unavailable or changed,
+        # simply skip applying the compatibility shim and continue without failing.
+        logger.warning(
+            "Skipping Langfuse usage patch: litellm or CompletionUsage is unavailable "
+            "or has an unexpected structure."
+        )
+
+
 @asynccontextmanager
 async def lifespan(app: FastAPI) -> AsyncIterator[None]:
     import os
 
     import litellm
 
+    from lib.blocks.commons import UsageTracker
+
     await storage.init_db()
 
-    # configure langfuse integration if credentials are set
+    # patch langfuse bug before enabling it
+    _patch_langfuse_usage_bug()
+
+    # configure langfuse integration and usage tracking
+    # note: litellm.callbacks is for custom callbacks, success_callback is for built-in integrations
     if os.getenv("LANGFUSE_PUBLIC_KEY") and os.getenv("LANGFUSE_SECRET_KEY"):
         litellm.success_callback = ["langfuse"]
         logger.info("Langfuse observability enabled")
 
+    # always register usage tracker via callbacks (works for all LLM calls including RAGAS)
+    litellm.callbacks = [UsageTracker.callback]
+
     yield
     # close storage connection on shutdown
     await storage.close()
@@ -455,17 +484,26 @@ async def list_blocks() -> list[dict[str, Any]]:
     """list all registered blocks with dynamically injected model options"""
     blocks = registry.list_blocks()
 
-    # get available llm models
+    # get available llm and embedding models
     llm_models = await llm_config_manager.list_llm_models()
+    embedding_models = await llm_config_manager.list_embedding_models()
     model_names = [model.name for model in llm_models]
+    embedding_names = [model.name for model in embedding_models]
 
-    # inject model options into TextGenerator and StructuredGenerator schemas
+    # inject model options into block schemas
     for block in blocks:
-        if block.get("type") in ["TextGenerator", "StructuredGenerator"]:
-            if "config_schema" in block and "properties" in block["config_schema"]:
-                if "model" in block["config_schema"]["properties"]:
-                    # add enum with available model names
-                    block["config_schema"]["properties"]["model"]["enum"] = model_names
+        block_type = block.get("type")
+        props = block.get("config_schema", {}).get("properties", {})
+
+        # inject LLM model options
+        if block_type in ["TextGenerator", "StructuredGenerator", "RagasMetrics"]:
+            if "model" in props:
+                props["model"]["enum"] = model_names
+
+        # inject embedding model options for RagasMetrics
+        if block_type == "RagasMetrics":
+            if "embedding_model" in props:
+                props["embedding_model"]["enum"] = embedding_names
 
     return blocks
 

@@ -1,15 +1,55 @@
 import asyncio
+import logging
+import os
 
-from lib.storage import Storage
-from lib.workflow import Pipeline as WorkflowPipeline
+import litellm
 
-PIPELINE_ID = 26
+# disable ragas analytics (must be before any ragas imports)
+os.environ["RAGAS_DO_NOT_TRACK"] = "true"
+
+# suppress asyncio SSL errors at shutdown (harmless cleanup noise from pending HTTP connections)
+logging.getLogger("asyncio").setLevel(logging.CRITICAL)
+
+
+def _patch_langfuse_usage_bug() -> None:
+    """patch litellm langfuse bug where .get() is called on pydantic model"""
+    try:
+        from litellm.types.utils import CompletionUsage
+
+        if not hasattr(CompletionUsage, "get"):
+
+            def pydantic_get(self, key, default=None):
+                return getattr(self, key, default)
+
+            CompletionUsage.get = pydantic_get
+    except (ImportError, AttributeError):
+        pass
+
+
+# apply patch before any litellm callbacks
+_patch_langfuse_usage_bug()
+
+from lib.blocks.commons import UsageTracker  # noqa: E402
+from lib.storage import Storage  # noqa: E402
+from lib.workflow import Pipeline as WorkflowPipeline  # noqa: E402
+
+# setup logging
+logging.basicConfig(level=logging.DEBUG)
+
+# register usage tracker callback
+litellm.success_callback = [UsageTracker.callback]
+# also try callbacks list
+litellm.callbacks = [UsageTracker.callback]
+
+PIPELINE_ID = 92
 SEED_DATA = {
     "repetitions": 1,
     "metadata": {
         "content": (
-            "Electric cars reduce emissions but require extensive charging "
-            "infrastructure and have higher upfront costs compared to traditional vehicles."
+            "Python is a high-level, interpreted programming language known for "
+            "its clear syntax and readability. It was created by Guido van Rossum "
+            "and first released in 1991. Python supports multiple programming "
+            "paradigms including procedural, object-oriented, and functional."
         )
     },
 }
@@ -26,9 +66,18 @@ async def main() -> None:
 
     workflow = WorkflowPipeline(name=pipeline_data.name, blocks=pipeline_data.definition["blocks"])
 
-    result, _, trace_id = await workflow.execute(SEED_DATA["metadata"])  # type: ignore[arg-type]
-    print(f"trace_id: {trace_id}")
-    print(f"result: {result}")
+    execution_result = await workflow.execute(SEED_DATA["metadata"])
+    print(f"trace_id: {execution_result.trace_id}")
+    print(f"result: {execution_result.result}")
+    print(f"usage: {execution_result.usage}")
+
+    # shutdown ragas analytics batcher before event loop closes
+    try:
+        from ragas._analytics import _analytics_batcher
+
+        _analytics_batcher.shutdown()
+    except ImportError:
+        pass
 
 
 if __name__ == "__main__":

@@ -0,0 +1,112 @@
+# RAGAS Evaluation Guide
+
+## Overview
+
+RAGAS (Retrieval Augmented Generation Assessment) is a framework for evaluating the quality of RAG-generated answers. The **RagasMetrics** block evaluates a single QA pair against multiple quality metrics.
+
+## Metrics
+
+### 1. Answer Relevancy
+**What it measures**: How relevant the answer is to the question.
+
+**Range**: 0.0 - 1.0 (higher is better)
+
+**Requires**:
+- question
+- answer
+- embeddings (configured via embedding model)
+
+**Example**:
+- Question: "What is the capital of France?"
+- Answer: "Paris is the capital of France" -> High score (0.9+)
+- Answer: "France is a European country" -> Low score (0.3-)
+
+### 2. Faithfulness
+**What it measures**: Whether the answer is factually consistent with the provided context.
+
+**Range**: 0.0 - 1.0 (higher is better)
+
+**Requires**:
+- question
+- answer
+- contexts
+
+**Example**:
+- Context: "The Eiffel Tower is 330 meters tall"
+- Answer: "The Eiffel Tower is 330 meters tall" -> High score (0.9+)
+- Answer: "The Eiffel Tower is 500 meters tall" -> Low score (0.3-)
+
+### 3. Context Precision
+**What it measures**: Whether the relevant context chunks appear earlier in the context list.
+
+**Range**: 0.0 - 1.0 (higher is better)
+
+**Requires**:
+- question
+- contexts
+- ground_truth
+
+**Example**:
+If the most relevant context appears first in the list -> High score
+If relevant context is buried at the end -> Low score
+
+### 4. Context Recall
+**What it measures**: Whether all information needed to answer the question is present in the contexts.
+
+**Range**: 0.0 - 1.0 (higher is better)
+
+**Requires**:
+- question
+- contexts
+- ground_truth
+
+**Example**:
+- Ground truth: "Paris is the capital of France, located on the Seine river"
+- Context includes both facts -> High score (1.0)
+- Context only includes capital fact -> Lower score (0.5)
+
+## Configuration
+
+### Field References
+
+The block uses field references to locate data in the pipeline state:
+- **question_field**: Field containing the question
+- **answer_field**: Field containing the answer
+- **contexts_field**: Field containing contexts (list of strings)
+- **ground_truth_field**: Field containing expected answer
+
+These are dropdowns populated from available pipeline fields, you can use the **FieldMapper** block to rename or create fields as needed (eg. extract fields from nasted structures).
+
+### Selecting Metrics
+
+Use the **metrics** multi-select to choose which metrics to compute:
+- Check all metrics you want to evaluate
+- Uncheck metrics you don't need
+- Note: `answer_relevancy` requires an embedding model
+
+### Score Threshold
+
+The field **score_threshold** is the minimum value for each metric to be considered passing. The block outputs a boolean `passed` indicating if all selected metrics meet or exceed this threshold.
+
+
+### Model Configuration
+
+- **model**: LLM model for evaluation (leave empty for pipeline default)
+- **embedding_model**: Embedding model for answer_relevancy (leave empty for default)
+
+## Output Format
+
+The block outputs a single `ragas_scores` object:
+
+```json
+{
+  ...
+  "ragas_scores": {
+    "answer_relevancy": 0.92,
+    "faithfulness": 0.88,
+    "context_precision": 0.95,
+    "context_recall": 0.85,
+    "passed": true
+  },
+}
+```
@@ -147,6 +147,48 @@ export default function BlockConfigPanel({
       return <Checkbox checked={value} onChange={(e) => handleChange(key, e.target.checked)} />;
     }
 
+    // multi-select enum (array with enum items)
+    if (schema.type === "array" && schema.items?.enum && Array.isArray(schema.items.enum)) {
+      const selectedValues = Array.isArray(value) ? value : [];
+      const options = schema.items.enum;
+
+      const handleToggle = (option: string) => {
+        const newValues = selectedValues.includes(option)
+          ? selectedValues.filter((v) => v !== option)
+          : [...selectedValues, option];
+        handleChange(key, newValues);
+      };
+
+      return (
+        <Box
+          sx={{
+            display: "flex",
+            flexDirection: "column",
+            gap: 2,
+            p: 2,
+            borderRadius: 2,
+            bg: "canvas.subtle",
+          }}
+        >
+          {options.map((option: string) => (
+            <Box key={option} sx={{ display: "flex", alignItems: "center", gap: 2 }}>
+              <Checkbox
+                checked={selectedValues.includes(option)}
+                onChange={() => handleToggle(option)}
+              />
+              <Text
+                sx={{
+                  color: "fg.default",
+                }}
+              >
+                {option}
+              </Text>
+            </Box>
+          ))}
+        </Box>
+      );
+    }
+
     // enum dropdown (predefined options)
     if (schema.enum && Array.isArray(schema.enum)) {
       return (

@@ -436,6 +436,13 @@ export default function PipelineEditor({
               availableFields.add(output);
             }
           });
+
+          // for FieldMapper, extract keys from mappings config
+          if (node.data.block.type === "FieldMapper" && node.data.config?.mappings) {
+            Object.keys(node.data.config.mappings).forEach((key) => {
+              availableFields.add(key);
+            });
+          }
         }
       });