add tracking demo and support video dump

2024-08-02 17:06:32 +08:00
parent bf450b6b41
commit 1d018ceb55
2 changed files with 46 additions and 15 deletions
--- a/grounded_sam2_tracking_demo.py
+++ b/grounded_sam2_tracking_demo.py
@@ -8,6 +8,7 @@ from sam2.build_sam import build_sam2_video_predictor, build_sam2
 from sam2.sam2_image_predictor import SAM2ImagePredictor
 from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection 
 from track_utils import sample_points_from_masks
 from video_utils import create_video_from_images
 """
@@ -152,21 +153,16 @@ for frame_idx, segments in video_segments.items():
        mask=masks, # (n, h, w)
        class_id=np.array(object_ids, dtype=np.int32),
    )
    box_annotator = sv.BoxAnnotator()
    annotated_frame = box_annotator.annotate(scene=img.copy(), detections=detections, labels=[ID_TO_OBJECTS[i] for i in object_ids])
    mask_annotator = sv.MaskAnnotator()
-    annotated_frame = mask_annotator.annotate(scene=img.copy(), detections=detections)
+    annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections)
-    cv2.imwrite(f"annotated_frame_{frame_idx}.jpg", annotated_frame)
+    cv2.imwrite(os.path.join(save_dir, f"annotated_frame_{frame_idx:05d}.jpg"), annotated_frame)
-# import cv2
+"""
-# import supervision as sv
+Step 6: Convert the annotated frames to video
-# # visualize each mask
+"""
-# for out_frame_idx, masks in video_segments.items():
+
-#     img = cv2.imread(os.path.join(video_dir, frame_names[out_frame_idx]))
+output_video_path = "./children_tracking_demo_video.mp4"
-#     detections = sv.Detections(
+create_video_from_images(save_dir, output_video_path)
 #         xyxy=np.array([[0, 0, 100, 100]]),  # (n, 4)
 #         mask=masks[1]
 #     )
 #     mask_annotator = sv.MaskAnnotator()
 #     annotated_frame = mask_annotator.annotate(scene=img.copy(), detections=detections)
 #     cv2.imwrite(f"annotated_frame_{out_frame_idx}.jpg", annotated_frame)
 #     import pdb; pdb.set_trace()
--- a/video_utils.py
+++ b/video_utils.py
@@ -0,0 +1,35 @@
 import cv2
 import os
 from tqdm import tqdm
 def create_video_from_images(image_folder, output_video_path, frame_rate=30):
    # 定义允许的图像后缀
    valid_extensions = [".jpg", ".jpeg", ".JPG", ".JPEG"]
    # 获取图像文件列表
    image_files = [f for f in os.listdir(image_folder) 
                   if os.path.splitext(f)[1] in valid_extensions]
    image_files.sort()  # 排序，确保按正确的顺序读取图像
    print(image_files)
    if not image_files:
        raise ValueError("No valid image files found in the specified folder.")
    # 读取第一张图像以获取视频尺寸
    first_image_path = os.path.join(image_folder, image_files[0])
    first_image = cv2.imread(first_image_path)
    height, width, _ = first_image.shape
    # 创建视频写入对象
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # 可以选择其他编码方式，如 'XVID'
    video_writer = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (width, height))
    # 逐帧写入视频
    for image_file in tqdm(image_files):
        image_path = os.path.join(image_folder, image_file)
        image = cv2.imread(image_path)
        video_writer.write(image)
    # 释放资源
    video_writer.release()
    print(f"Video saved at {output_video_path}")