upgrade supervision to 0.22.0 and refine custom API usage

2024-08-06 01:59:27 +08:00
parent ed4c128a4e
commit 6915725120
7 changed files with 66 additions and 22 deletions
--- a/README.md
+++ b/README.md
@@ -41,12 +41,6 @@ Install `Grounding DINO`:
 pip install --no-build-isolation -e grounding_dino
 ```

-Downgrade the version of the `supervision` library to `0.6.0` to use its original API for visualization (we will update our code to be compatible with the latest version of `supervision` in the future release):
-
-```bash
-pip install supervision==0.6.0
-```
-
 Download the pretrained `SAM 2` checkpoints:

 ```bash
@@ -71,12 +65,16 @@ Note that `Grounding DINO` has already been supported in [Huggingface](https://h
 python grounded_sam2_hf_model_demo.py
 ```

+> [!NOTE]
+> 🚨 If you encounter network issues while using the `HuggingFace` model, you can resolve them by setting the appropriate mirror source as `export HF_ENDPOINT=https://hf-mirror.com`
+
 - Load local pretrained Grounding DINO checkpoint and inference with Grounding DINO original API (make sure you've already downloaded the pretrained checkpoint)

 ```bash
 python grounded_sam2_local_demo.py
 ```

+
 ### Grounded-SAM-2 Image Demo (with Grounding DINO 1.5 & 1.6)

 We've already released our most capable open-set detection model [Grounding DINO 1.5 & 1.6](https://github.com/IDEA-Research/Grounding-DINO-1.5-API), which can be combined with SAM 2 for stronger open-set detection and segmentation capability. You can apply the API token first and run Grounded-SAM-2 with Grounding DINO 1.5 as follows:
--- a/grounded_sam2_gd1.5_demo.py
+++ b/grounded_sam2_gd1.5_demo.py
@@ -18,7 +18,7 @@ from sam2.sam2_image_predictor import SAM2ImagePredictor
 Prompt Grounding DINO 1.5 with Text for Box Prompt Generation with Cloud API
 """
 # Step 1: initialize the config
-token = "Your API token here"
+token = "Your API token"
 config = Config(token)

 # Step 2: initialize the client
@@ -101,21 +101,31 @@ elif masks.ndim == 4:
 Visualization the Predict Results
 """

+class_ids = np.array(list(range(len(class_names))))
+
 labels = [
    f"{class_name} {confidence:.2f}"
    for class_name, confidence
    in zip(class_names, confidences)
 ]
+
+"""
+Visualize image with supervision useful API
+"""
 img = cv2.imread(img_path)
 detections = sv.Detections(
    xyxy=input_boxes,  # (n, 4)
-    mask=masks,  # (n, h, w)
+    mask=masks.astype(bool),  # (n, h, w)
+    class_id=class_ids
 )

 box_annotator = sv.BoxAnnotator()
-annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections, labels=labels)
+annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections)
+
+label_annotator = sv.LabelAnnotator()
+annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
 cv2.imwrite("groundingdino_annotated_image.jpg", annotated_frame)

 mask_annotator = sv.MaskAnnotator()
 annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections)
-cv2.imwrite("grounded_sam2_annotated_image_with_mask.jpg", annotated_frame)
+cv2.imwrite("grounded_sam2_annotated_image_with_mask.jpg", annotated_frame)
--- a/grounded_sam2_hf_model_demo.py
+++ b/grounded_sam2_hf_model_demo.py
@@ -2,6 +2,8 @@ import cv2
 import torch
 import numpy as np
 import supervision as sv
+from supervision.draw.color import ColorPalette
+from supervision_utils import CUSTOM_COLOR_MAP
 from PIL import Image
 from sam2.build_sam import build_sam2
 from sam2.sam2_image_predictor import SAM2ImagePredictor
@@ -89,6 +91,7 @@ elif masks.ndim == 4:

 confidences = results[0]["scores"].cpu().numpy().tolist()
 class_names = results[0]["labels"]
+class_ids = np.array(list(range(len(class_names))))

 labels = [
    f"{class_name} {confidence:.2f}"
@@ -102,13 +105,21 @@ Visualize image with supervision useful API
 img = cv2.imread(img_path)
 detections = sv.Detections(
    xyxy=input_boxes,  # (n, 4)
-    mask=masks,  # (n, h, w)
+    mask=masks.astype(bool),  # (n, h, w)
+    class_id=class_ids
 )

-box_annotator = sv.BoxAnnotator()
-annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections, labels=labels)
+"""
+Note that if you want to use default color map,
+you can set color=ColorPalette.DEFAULT
+"""
+box_annotator = sv.BoxAnnotator(color=ColorPalette.from_hex(CUSTOM_COLOR_MAP))
+annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections)
+
+label_annotator = sv.LabelAnnotator(color=ColorPalette.from_hex(CUSTOM_COLOR_MAP))
+annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
 cv2.imwrite("groundingdino_annotated_image.jpg", annotated_frame)

-mask_annotator = sv.MaskAnnotator()
+mask_annotator = sv.MaskAnnotator(color=ColorPalette.from_hex(CUSTOM_COLOR_MAP))
 annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections)
 cv2.imwrite("grounded_sam2_annotated_image_with_mask.jpg", annotated_frame)
--- a/grounded_sam2_local_demo.py
+++ b/grounded_sam2_local_demo.py
@@ -64,8 +64,6 @@ masks, scores, logits = sam2_predictor.predict(
    multimask_output=False,
 )

-import pdb; pdb.set_trace()
-
 """
 Post-process the output of the model to get the masks, scores, and logits for visualization
 """
@@ -81,6 +79,8 @@ elif masks.ndim == 4:
 confidences = confidences.numpy().tolist()
 class_names = labels

+class_ids = np.array(list(range(len(class_names))))
+
 labels = [
    f"{class_name} {confidence:.2f}"
    for class_name, confidence
@@ -93,11 +93,15 @@ Visualize image with supervision useful API
 img = cv2.imread(img_path)
 detections = sv.Detections(
    xyxy=input_boxes,  # (n, 4)
-    mask=masks,  # (n, h, w)
+    mask=masks.astype(bool),  # (n, h, w)
+    class_id=class_ids
 )

 box_annotator = sv.BoxAnnotator()
-annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections, labels=labels)
+annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections)
+
+label_annotator = sv.LabelAnnotator()
+annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
 cv2.imwrite("groundingdino_annotated_image.jpg", annotated_frame)

 mask_annotator = sv.MaskAnnotator()
--- a/grounded_sam2_tracking_demo.py
+++ b/grounded_sam2_tracking_demo.py
@@ -154,7 +154,9 @@ for frame_idx, segments in video_segments.items():
        class_id=np.array(object_ids, dtype=np.int32),
    )
    box_annotator = sv.BoxAnnotator()
-    annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections, labels=[ID_TO_OBJECTS[i] for i in object_ids])
+    annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections)
+    label_annotator = sv.LabelAnnotator()
+    annotated_frame = label_annotator.annotate(annotated_frame, detections=detections, labels=[ID_TO_OBJECTS[i] for i in object_ids])
    mask_annotator = sv.MaskAnnotator()
    annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections)
    cv2.imwrite(os.path.join(save_dir, f"annotated_frame_{frame_idx:05d}.jpg"), annotated_frame)
--- a/grounded_sam2_tracking_demo_with_gd1.5.py
+++ b/grounded_sam2_tracking_demo_with_gd1.5.py
@@ -13,8 +13,7 @@ import numpy as np
 import supervision as sv
 from PIL import Image
 from sam2.build_sam import build_sam2_video_predictor, build_sam2
-from sam2.sam2_image_predictor import SAM2ImagePredictor
-from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection 
+from sam2.sam2_image_predictor import SAM2ImagePredictor 
 from track_utils import sample_points_from_masks
 from video_utils import create_video_from_images

@@ -177,7 +176,9 @@ for frame_idx, segments in video_segments.items():
        class_id=np.array(object_ids, dtype=np.int32),
    )
    box_annotator = sv.BoxAnnotator()
-    annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections, labels=[ID_TO_OBJECTS[i] for i in object_ids])
+    annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections)
+    label_annotator = sv.LabelAnnotator()
+    annotated_frame = label_annotator.annotate(annotated_frame, detections=detections, labels=[ID_TO_OBJECTS[i] for i in object_ids])
    mask_annotator = sv.MaskAnnotator()
    annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections)
    cv2.imwrite(os.path.join(save_dir, f"annotated_frame_{frame_idx:05d}.jpg"), annotated_frame)
--- a/supervision_utils.py
+++ b/supervision_utils.py
@@ -0,0 +1,18 @@
+CUSTOM_COLOR_MAP = [
+    "#e6194b",
+    "#3cb44b",
+    "#ffe119",
+    "#0082c8",
+    "#f58231",
+    "#911eb4",
+    "#46f0f0",
+    "#f032e6",
+    "#d2f53c",
+    "#fabebe",
+    "#008080",
+    "#e6beff",
+    "#aa6e28",
+    "#fffac8",
+    "#800000",
+    "#aaffc3",
+]