diff --git a/README.md b/README.md index 3cb30ff..161a48e 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Grounded SAM 2 does not introduce significant methodological changes compared to ## Latest updates -- `2024/12/02`: Support **DINO-X SAM 2 Demos** (including object segmentation and tracking), please install the latest version of `dds-cloudapi-sdk` and refer to [Grounded SAM 2 (with DINO-X)](#grounded-sam-2-image-demo-with-dino-x) and [Grounded SAM 2 Video (with DINO-X)](#grounded-sam-2-video-object-tracking-demo-with-custom-video-input-with-dino-x) for more details. +- `2024/12/02`: Support **DINO-X with SAM 2** demos (including object segmentation and tracking), please install the latest version of `dds-cloudapi-sdk==0.3.3` and refer to [Grounded SAM 2 (with DINO-X)](#grounded-sam-2-image-demo-with-dino-x) and [Grounded SAM 2 Video (with DINO-X)](#grounded-sam-2-video-object-tracking-demo-with-custom-video-input-with-dino-x) for more details. - `2024/10/24`: Support [SAHI (Slicing Aided Hyper Inference)](https://docs.ultralytics.com/guides/sahi-tiled-inference/) on Grounded SAM 2 (with Grounding DINO 1.5) which may be helpful for inferencing high resolution image with dense small objects (e.g. **4K** images). - `2024/10/10`: Support `SAM-2.1` models, if you want to use `SAM 2.1` model, you need to update to the latest code and reinstall SAM 2 follow [SAM 2.1 Installation](https://github.com/facebookresearch/sam2?tab=readme-ov-file#latest-updates). - `2024/08/31`: Support `dump json results` in Grounded SAM 2 Image Demos (with Grounding DINO). diff --git a/grounded_sam2_dinox_demo.py b/grounded_sam2_dinox_demo.py index 6d2e6f3..e1f748d 100644 --- a/grounded_sam2_dinox_demo.py +++ b/grounded_sam2_dinox_demo.py @@ -2,6 +2,7 @@ from dds_cloudapi_sdk import Config from dds_cloudapi_sdk import Client from dds_cloudapi_sdk.tasks.dinox import DinoxTask +from dds_cloudapi_sdk.tasks.types import DetectionTarget from dds_cloudapi_sdk import TextPrompt import os @@ -64,7 +65,9 @@ if WITH_SLICE_INFERENCE: image_url = client.upload_file(temp_filename) task = DinoxTask( image_url=image_url, - prompts=[TextPrompt(text=TEXT_PROMPT)] + prompts=[TextPrompt(text=TEXT_PROMPT)], + bbox_threshold=0.25, + targets=[DetectionTarget.BBox], ) client.run_task(task) result = task.result @@ -103,7 +106,9 @@ else: task = DinoxTask( image_url=image_url, - prompts=[TextPrompt(text=TEXT_PROMPT)] + prompts=[TextPrompt(text=TEXT_PROMPT)], + bbox_threshold=0.25, + targets=[DetectionTarget.BBox], ) client.run_task(task) diff --git a/grounded_sam2_tracking_demo_custom_video_input_dinox.py b/grounded_sam2_tracking_demo_custom_video_input_dinox.py index 7f8a971..e715218 100644 --- a/grounded_sam2_tracking_demo_custom_video_input_dinox.py +++ b/grounded_sam2_tracking_demo_custom_video_input_dinox.py @@ -2,6 +2,7 @@ from dds_cloudapi_sdk import Config from dds_cloudapi_sdk import Client from dds_cloudapi_sdk.tasks.dinox import DinoxTask +from dds_cloudapi_sdk.tasks.types import DetectionTarget from dds_cloudapi_sdk import TextPrompt import os @@ -26,7 +27,7 @@ TEXT_PROMPT = "hippopotamus." OUTPUT_VIDEO_PATH = "./hippopotamus_tracking_demo.mp4" SOURCE_VIDEO_FRAME_DIR = "./custom_video_frames" SAVE_TRACKING_RESULTS_DIR = "./tracking_results" -API_TOKEN_FOR_GD1_5 = "Your API token" +API_TOKEN_FOR_DINOX = "Your API token" PROMPT_TYPE_FOR_VIDEO = "box" # choose from ["point", "box", "mask"] BOX_THRESHOLD = 0.2 @@ -92,7 +93,7 @@ img_path = os.path.join(SOURCE_VIDEO_FRAME_DIR, frame_names[ann_frame_idx]) image = Image.open(img_path) # Step 1: initialize the config -config = Config(API_TOKEN_FOR_GD1_5) +config = Config(API_TOKEN_FOR_DINOX) # Step 2: initialize the client client = Client(config) @@ -104,7 +105,9 @@ image_url = client.upload_file(img_path) task = DinoxTask( image_url=image_url, - prompts=[TextPrompt(text=TEXT_PROMPT)] + prompts=[TextPrompt(text=TEXT_PROMPT)], + bbox_threshold=0.25, + targets=[DetectionTarget.BBox], ) client.run_task(task)