From 799d0e3bccb4e092b4cd05a2488e6cf658b5106b Mon Sep 17 00:00:00 2001
From: Pepijn <pepijn@huggingface.co>
Date: Tue, 2 Jun 2026 15:34:34 +0200
Subject: [PATCH] annotate: stitch subtasks to full-episode coverage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The verify pass prunes subtasks, which could leave the first subtask
starting after t0 or leave gaps between spans — so the subtask timeline
no longer tiled the episode and frames fell through with no active
subtask label.

New deterministic post-step (no VLM call), default on via
PlanConfig.subtask_full_coverage:
  * first subtask start pulled back to the episode's first frame t0
    (idle / approach before the first labelled action folds into it)
  * each subtask end snapped to the next subtask start (gaps closed)
  * last subtask end extended to the last frame t_last

Runs after segment + verify in _generate_subtasks. Starts other than
the first are left as the VLM/verify produced them (already frame-
snapped + distinct), so the cover is contiguous and non-overlapping.
Disable with --plan.subtask_full_coverage=false if a consumer wants
sparse subtasks.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../annotations/steerable_pipeline/config.py  | 10 +++++
 .../modules/plan_subtasks_memory.py           | 43 +++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/src/lerobot/annotations/steerable_pipeline/config.py b/src/lerobot/annotations/steerable_pipeline/config.py
index 18867f701..0389363b8 100644
--- a/src/lerobot/annotations/steerable_pipeline/config.py
+++ b/src/lerobot/annotations/steerable_pipeline/config.py
@@ -73,6 +73,16 @@ class PlanConfig:
     # nothing). +1 VLM call/ep.
     subtask_verify: bool = True
 
+    # ``subtask_full_coverage``: deterministic post-step (no VLM call)
+    # that stitches the surviving subtask spans into a contiguous cover
+    # of the whole episode — first subtask pulled back to t0, each span's
+    # end snapped to the next span's start, last span extended to t_last.
+    # Without it the verify pass (which prunes spans) can leave the
+    # subtask timeline starting late or full of gaps, so frames fall
+    # through with no active subtask. On by default; disable only if a
+    # downstream consumer genuinely wants sparse (non-tiling) subtasks.
+    subtask_full_coverage: bool = True
+
     # When True (and backend supports it, e.g. ``openai``), the ``plan``
     # module sends a ``video_url`` block pointing at a per-episode mp4
     # subclip and lets the server sample frames at ``use_video_url_fps``.
diff --git a/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py b/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py
index 1ba9b142b..b117c5657 100644
--- a/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py
+++ b/src/lerobot/annotations/steerable_pipeline/modules/plan_subtasks_memory.py
@@ -571,9 +571,52 @@ class PlanSubtasksMemoryModule:
         # ---- Pass 3 (optional): verification / pruning ---------------
         if getattr(self.config, "subtask_verify", False):
             cleaned = self._verify_subtasks(record, effective_task, cleaned)
+            if not cleaned:
+                return []
+
+        # ---- Full-episode coverage stitch ----------------------------
+        # The VLM (especially after the verify pass prunes spans) can
+        # leave the first subtask starting after t0 or leave gaps between
+        # spans, so the subtask timeline no longer tiles the whole
+        # episode and frames fall through with no active subtask. Stitch
+        # the surviving spans into a contiguous cover of [t0, t_last].
+        if getattr(self.config, "subtask_full_coverage", True):
+            cleaned = self._stitch_full_coverage(cleaned, record)
 
         return cleaned
 
+    def _stitch_full_coverage(
+        self, spans: list[dict[str, Any]], record: EpisodeRecord
+    ) -> list[dict[str, Any]]:
+        """Make subtask spans tile the full episode with no gaps.
+
+        * The first subtask starts at the episode's first frame ``t0``
+          (any idle / approach before the first labelled action is folded
+          into it), so every early frame has an active subtask.
+        * Each subtask's ``end`` is snapped to the next subtask's
+          ``start`` (gaps between spans are closed), and the final
+          subtask's ``end`` extends to the last frame ``t_last``.
+
+        Starts are otherwise left as the (already frame-snapped, distinct)
+        values the VLM + verify produced — only the FIRST start is pulled
+        back to ``t0``, which can't collide with a later span because it
+        was already the earliest. Purely deterministic; runs after the
+        VLM passes.
+        """
+        if not spans or not record.frame_timestamps:
+            return spans
+        t0 = float(record.frame_timestamps[0])
+        t_last = float(record.frame_timestamps[-1])
+        spans = sorted(spans, key=lambda s: float(s["start"]))
+        spans[0]["start"] = t0
+        for i in range(len(spans) - 1):
+            spans[i]["end"] = float(spans[i + 1]["start"])
+        spans[-1]["end"] = t_last
+        for s in spans:
+            if float(s["end"]) < float(s["start"]):
+                s["end"] = float(s["start"])
+        return spans
+
     def _clean_spans(
         self, spans: Any, record: EpisodeRecord
     ) -> list[dict[str, Any]]: