From 1aec7ded1669eacd9720132776863bfea87d10fb Mon Sep 17 00:00:00 2001
From: rentainhe <596106517@qq.com>
Date: Thu, 31 Oct 2024 15:50:14 +0800
Subject: [PATCH] support box threshold in GD 1.5 demos

---
 README.md                                               | 2 +-
 grounded_sam2_gd1.5_demo.py                             | 3 +++
 grounded_sam2_tracking_demo_custom_video_input_gd1.5.py | 2 ++
 grounded_sam2_tracking_demo_with_gd1.5.py               | 1 +
 4 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 20438f3..af0111b 100644
--- a/README.md
+++ b/README.md
@@ -130,7 +130,7 @@ We've already released our most capable open-set detection model [Grounding DINO
 Install the latest DDS cloudapi:
 
 ```bash
-pip install dds-cloudapi-sdk
+pip install dds-cloudapi-sdk --upgrade
 ```
 
 Apply your API token from our official website here: [request API token](https://deepdataspace.com/request_api).
diff --git a/grounded_sam2_gd1.5_demo.py b/grounded_sam2_gd1.5_demo.py
index f1110cd..ff0c619 100644
--- a/grounded_sam2_gd1.5_demo.py
+++ b/grounded_sam2_gd1.5_demo.py
@@ -28,6 +28,7 @@ IMG_PATH = "notebooks/images/cars.jpg"
 SAM2_CHECKPOINT = "./checkpoints/sam2.1_hiera_large.pt"
 SAM2_MODEL_CONFIG = "configs/sam2.1/sam2.1_hiera_l.yaml"
 GROUNDING_MODEL = DetectionModel.GDino1_5_Pro # DetectionModel.GDino1_6_Pro
+BOX_THRESHOLD = 0.2
 WITH_SLICE_INFERENCE = False
 SLICE_WH = (480, 480)
 OVERLAP_RATIO = (0.2, 0.2)
@@ -69,6 +70,7 @@ if WITH_SLICE_INFERENCE:
             prompts=[TextPrompt(text=TEXT_PROMPT)],
             targets=[DetectionTarget.BBox],  # detect bbox
             model=GROUNDING_MODEL,  # detect with GroundingDino-1.5-Pro model
+            bbox_threshold=BOX_THRESHOLD, # box confidence threshold
         )
         client.run_task(task)
         result = task.result
@@ -110,6 +112,7 @@ else:
         prompts=[TextPrompt(text=TEXT_PROMPT)],
         targets=[DetectionTarget.BBox],  # detect bbox
         model=GROUNDING_MODEL,  # detect with GroundingDINO-1.5-Pro model
+        bbox_threshold=BOX_THRESHOLD, # box confidence threshold
     )
 
     client.run_task(task)
diff --git a/grounded_sam2_tracking_demo_custom_video_input_gd1.5.py b/grounded_sam2_tracking_demo_custom_video_input_gd1.5.py
index 8f712f8..2c32ed3 100644
--- a/grounded_sam2_tracking_demo_custom_video_input_gd1.5.py
+++ b/grounded_sam2_tracking_demo_custom_video_input_gd1.5.py
@@ -30,6 +30,7 @@ SOURCE_VIDEO_FRAME_DIR = "./custom_video_frames"
 SAVE_TRACKING_RESULTS_DIR = "./tracking_results"
 API_TOKEN_FOR_GD1_5 = "Your API token"
 PROMPT_TYPE_FOR_VIDEO = "box" # choose from ["point", "box", "mask"]
+BOX_THRESHOLD = 0.2
 
 """
 Step 1: Environment settings and model initialization for SAM 2
@@ -108,6 +109,7 @@ task = DetectionTask(
     prompts=[TextPrompt(text=TEXT_PROMPT)],
     targets=[DetectionTarget.BBox],  # detect bbox
     model=DetectionModel.GDino1_6_Pro,  # detect with GroundingDino-1.5-Pro model
+    bbox_threshold=BOX_THRESHOLD,
 )
 
 client.run_task(task)
diff --git a/grounded_sam2_tracking_demo_with_gd1.5.py b/grounded_sam2_tracking_demo_with_gd1.5.py
index 7d58875..48ceb8c 100644
--- a/grounded_sam2_tracking_demo_with_gd1.5.py
+++ b/grounded_sam2_tracking_demo_with_gd1.5.py
@@ -80,6 +80,7 @@ task = DetectionTask(
     prompts=[TextPrompt(text="children. pillow")],
     targets=[DetectionTarget.BBox],  # detect bbox
     model=DetectionModel.GDino1_5_Pro,  # detect with GroundingDino-1.5-Pro model
+    bbox_threshold=0.2,
 )
 
 client.run_task(task)