upgrade supervision to 0.22.0 and refine custom API usage

This commit is contained in:
rentainhe
2024-08-06 01:59:27 +08:00
parent ed4c128a4e
commit 6915725120
7 changed files with 66 additions and 22 deletions

View File

@@ -41,12 +41,6 @@ Install `Grounding DINO`:
pip install --no-build-isolation -e grounding_dino pip install --no-build-isolation -e grounding_dino
``` ```
Downgrade the version of the `supervision` library to `0.6.0` to use its original API for visualization (we will update our code to be compatible with the latest version of `supervision` in the future release):
```bash
pip install supervision==0.6.0
```
Download the pretrained `SAM 2` checkpoints: Download the pretrained `SAM 2` checkpoints:
```bash ```bash
@@ -71,12 +65,16 @@ Note that `Grounding DINO` has already been supported in [Huggingface](https://h
python grounded_sam2_hf_model_demo.py python grounded_sam2_hf_model_demo.py
``` ```
> [!NOTE]
> 🚨 If you encounter network issues while using the `HuggingFace` model, you can resolve them by setting the appropriate mirror source as `export HF_ENDPOINT=https://hf-mirror.com`
- Load local pretrained Grounding DINO checkpoint and inference with Grounding DINO original API (make sure you've already downloaded the pretrained checkpoint) - Load local pretrained Grounding DINO checkpoint and inference with Grounding DINO original API (make sure you've already downloaded the pretrained checkpoint)
```bash ```bash
python grounded_sam2_local_demo.py python grounded_sam2_local_demo.py
``` ```
### Grounded-SAM-2 Image Demo (with Grounding DINO 1.5 & 1.6) ### Grounded-SAM-2 Image Demo (with Grounding DINO 1.5 & 1.6)
We've already released our most capable open-set detection model [Grounding DINO 1.5 & 1.6](https://github.com/IDEA-Research/Grounding-DINO-1.5-API), which can be combined with SAM 2 for stronger open-set detection and segmentation capability. You can apply the API token first and run Grounded-SAM-2 with Grounding DINO 1.5 as follows: We've already released our most capable open-set detection model [Grounding DINO 1.5 & 1.6](https://github.com/IDEA-Research/Grounding-DINO-1.5-API), which can be combined with SAM 2 for stronger open-set detection and segmentation capability. You can apply the API token first and run Grounded-SAM-2 with Grounding DINO 1.5 as follows:

View File

@@ -18,7 +18,7 @@ from sam2.sam2_image_predictor import SAM2ImagePredictor
Prompt Grounding DINO 1.5 with Text for Box Prompt Generation with Cloud API Prompt Grounding DINO 1.5 with Text for Box Prompt Generation with Cloud API
""" """
# Step 1: initialize the config # Step 1: initialize the config
token = "Your API token here" token = "Your API token"
config = Config(token) config = Config(token)
# Step 2: initialize the client # Step 2: initialize the client
@@ -101,21 +101,31 @@ elif masks.ndim == 4:
Visualization the Predict Results Visualization the Predict Results
""" """
class_ids = np.array(list(range(len(class_names))))
labels = [ labels = [
f"{class_name} {confidence:.2f}" f"{class_name} {confidence:.2f}"
for class_name, confidence for class_name, confidence
in zip(class_names, confidences) in zip(class_names, confidences)
] ]
"""
Visualize image with supervision useful API
"""
img = cv2.imread(img_path) img = cv2.imread(img_path)
detections = sv.Detections( detections = sv.Detections(
xyxy=input_boxes, # (n, 4) xyxy=input_boxes, # (n, 4)
mask=masks, # (n, h, w) mask=masks.astype(bool), # (n, h, w)
class_id=class_ids
) )
box_annotator = sv.BoxAnnotator() box_annotator = sv.BoxAnnotator()
annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections, labels=labels) annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections)
label_annotator = sv.LabelAnnotator()
annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
cv2.imwrite("groundingdino_annotated_image.jpg", annotated_frame) cv2.imwrite("groundingdino_annotated_image.jpg", annotated_frame)
mask_annotator = sv.MaskAnnotator() mask_annotator = sv.MaskAnnotator()
annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections) annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections)
cv2.imwrite("grounded_sam2_annotated_image_with_mask.jpg", annotated_frame) cv2.imwrite("grounded_sam2_annotated_image_with_mask.jpg", annotated_frame)

View File

@@ -2,6 +2,8 @@ import cv2
import torch import torch
import numpy as np import numpy as np
import supervision as sv import supervision as sv
from supervision.draw.color import ColorPalette
from supervision_utils import CUSTOM_COLOR_MAP
from PIL import Image from PIL import Image
from sam2.build_sam import build_sam2 from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor from sam2.sam2_image_predictor import SAM2ImagePredictor
@@ -89,6 +91,7 @@ elif masks.ndim == 4:
confidences = results[0]["scores"].cpu().numpy().tolist() confidences = results[0]["scores"].cpu().numpy().tolist()
class_names = results[0]["labels"] class_names = results[0]["labels"]
class_ids = np.array(list(range(len(class_names))))
labels = [ labels = [
f"{class_name} {confidence:.2f}" f"{class_name} {confidence:.2f}"
@@ -102,13 +105,21 @@ Visualize image with supervision useful API
img = cv2.imread(img_path) img = cv2.imread(img_path)
detections = sv.Detections( detections = sv.Detections(
xyxy=input_boxes, # (n, 4) xyxy=input_boxes, # (n, 4)
mask=masks, # (n, h, w) mask=masks.astype(bool), # (n, h, w)
class_id=class_ids
) )
box_annotator = sv.BoxAnnotator() """
annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections, labels=labels) Note that if you want to use default color map,
you can set color=ColorPalette.DEFAULT
"""
box_annotator = sv.BoxAnnotator(color=ColorPalette.from_hex(CUSTOM_COLOR_MAP))
annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections)
label_annotator = sv.LabelAnnotator(color=ColorPalette.from_hex(CUSTOM_COLOR_MAP))
annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
cv2.imwrite("groundingdino_annotated_image.jpg", annotated_frame) cv2.imwrite("groundingdino_annotated_image.jpg", annotated_frame)
mask_annotator = sv.MaskAnnotator() mask_annotator = sv.MaskAnnotator(color=ColorPalette.from_hex(CUSTOM_COLOR_MAP))
annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections) annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections)
cv2.imwrite("grounded_sam2_annotated_image_with_mask.jpg", annotated_frame) cv2.imwrite("grounded_sam2_annotated_image_with_mask.jpg", annotated_frame)

View File

@@ -64,8 +64,6 @@ masks, scores, logits = sam2_predictor.predict(
multimask_output=False, multimask_output=False,
) )
import pdb; pdb.set_trace()
""" """
Post-process the output of the model to get the masks, scores, and logits for visualization Post-process the output of the model to get the masks, scores, and logits for visualization
""" """
@@ -81,6 +79,8 @@ elif masks.ndim == 4:
confidences = confidences.numpy().tolist() confidences = confidences.numpy().tolist()
class_names = labels class_names = labels
class_ids = np.array(list(range(len(class_names))))
labels = [ labels = [
f"{class_name} {confidence:.2f}" f"{class_name} {confidence:.2f}"
for class_name, confidence for class_name, confidence
@@ -93,11 +93,15 @@ Visualize image with supervision useful API
img = cv2.imread(img_path) img = cv2.imread(img_path)
detections = sv.Detections( detections = sv.Detections(
xyxy=input_boxes, # (n, 4) xyxy=input_boxes, # (n, 4)
mask=masks, # (n, h, w) mask=masks.astype(bool), # (n, h, w)
class_id=class_ids
) )
box_annotator = sv.BoxAnnotator() box_annotator = sv.BoxAnnotator()
annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections, labels=labels) annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections)
label_annotator = sv.LabelAnnotator()
annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
cv2.imwrite("groundingdino_annotated_image.jpg", annotated_frame) cv2.imwrite("groundingdino_annotated_image.jpg", annotated_frame)
mask_annotator = sv.MaskAnnotator() mask_annotator = sv.MaskAnnotator()

View File

@@ -154,7 +154,9 @@ for frame_idx, segments in video_segments.items():
class_id=np.array(object_ids, dtype=np.int32), class_id=np.array(object_ids, dtype=np.int32),
) )
box_annotator = sv.BoxAnnotator() box_annotator = sv.BoxAnnotator()
annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections, labels=[ID_TO_OBJECTS[i] for i in object_ids]) annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections)
label_annotator = sv.LabelAnnotator()
annotated_frame = label_annotator.annotate(annotated_frame, detections=detections, labels=[ID_TO_OBJECTS[i] for i in object_ids])
mask_annotator = sv.MaskAnnotator() mask_annotator = sv.MaskAnnotator()
annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections) annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections)
cv2.imwrite(os.path.join(save_dir, f"annotated_frame_{frame_idx:05d}.jpg"), annotated_frame) cv2.imwrite(os.path.join(save_dir, f"annotated_frame_{frame_idx:05d}.jpg"), annotated_frame)

View File

@@ -13,8 +13,7 @@ import numpy as np
import supervision as sv import supervision as sv
from PIL import Image from PIL import Image
from sam2.build_sam import build_sam2_video_predictor, build_sam2 from sam2.build_sam import build_sam2_video_predictor, build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor from sam2.sam2_image_predictor import SAM2ImagePredictor
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
from track_utils import sample_points_from_masks from track_utils import sample_points_from_masks
from video_utils import create_video_from_images from video_utils import create_video_from_images
@@ -177,7 +176,9 @@ for frame_idx, segments in video_segments.items():
class_id=np.array(object_ids, dtype=np.int32), class_id=np.array(object_ids, dtype=np.int32),
) )
box_annotator = sv.BoxAnnotator() box_annotator = sv.BoxAnnotator()
annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections, labels=[ID_TO_OBJECTS[i] for i in object_ids]) annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections)
label_annotator = sv.LabelAnnotator()
annotated_frame = label_annotator.annotate(annotated_frame, detections=detections, labels=[ID_TO_OBJECTS[i] for i in object_ids])
mask_annotator = sv.MaskAnnotator() mask_annotator = sv.MaskAnnotator()
annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections) annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections)
cv2.imwrite(os.path.join(save_dir, f"annotated_frame_{frame_idx:05d}.jpg"), annotated_frame) cv2.imwrite(os.path.join(save_dir, f"annotated_frame_{frame_idx:05d}.jpg"), annotated_frame)

18
supervision_utils.py Normal file
View File

@@ -0,0 +1,18 @@
CUSTOM_COLOR_MAP = [
"#e6194b",
"#3cb44b",
"#ffe119",
"#0082c8",
"#f58231",
"#911eb4",
"#46f0f0",
"#f032e6",
"#d2f53c",
"#fabebe",
"#008080",
"#e6beff",
"#aa6e28",
"#fffac8",
"#800000",
"#aaffc3",
]