From 6aeee347759c091e5718c4a03fe0032165564c50 Mon Sep 17 00:00:00 2001
From: Niels <niels.rogge1@gmail.com>
Date: Mon, 5 Aug 2024 09:37:53 +0200
Subject: [PATCH] Make huggingface_hub soft dependency

---
 sam2/build_sam.py            |  8 +++++---
 sam2/sam2_image_predictor.py |  3 ++-
 sam2/sam2_video_predictor.py | 17 +++++++++++++++++
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/sam2/build_sam.py b/sam2/build_sam.py
index 00f9dcf..9bb5279 100644
--- a/sam2/build_sam.py
+++ b/sam2/build_sam.py
@@ -11,8 +11,6 @@ from hydra import compose
 from hydra.utils import instantiate
 from omegaconf import OmegaConf
 
-from huggingface_hub import hf_hub_download
-
 
 def build_sam2(
     config_file,
@@ -80,6 +78,8 @@ def build_sam2_video_predictor(
 
 def build_sam2_hf(model_id, **kwargs):
 
+    from huggingface_hub import hf_hub_download
+
     model_id_to_filenames = {
         "facebook/sam2-hiera-tiny": ("sam2_hiera_t.yaml", "sam2_hiera_tiny.pt"),
         "facebook/sam2-hiera-small": ("sam2_hiera_s.yaml", "sam2_hiera_small.pt"),
@@ -87,12 +87,14 @@ def build_sam2_hf(model_id, **kwargs):
         "facebook/sam2-hiera-large": ("sam2_hiera_l.yaml", "sam2_hiera_large.pt"),
     }
     config_name, checkpoint_name = model_id_to_filenames[model_id]
-    # config_file = hf_hub_download(repo_id=model_id, filename=config_name)
     ckpt_path = hf_hub_download(repo_id=model_id, filename=checkpoint_name)
     return build_sam2_video_predictor(config_file=config_name, ckpt_path=ckpt_path, **kwargs)
 
 
 def build_sam2_video_predictor_hf(model_id, **kwargs):
+
+    from huggingface_hub import hf_hub_download
+
     config_file = hf_hub_download(repo_id=model_id, filename=f"{model_id}.yaml")
     ckpt_path = hf_hub_download(repo_id=model_id, filename=f"{model_id}.pt")
     return build_sam2_video_predictor(config_file=config_file, ckpt_path=ckpt_path, **kwargs)
diff --git a/sam2/sam2_image_predictor.py b/sam2/sam2_image_predictor.py
index 5d2980c..9bee70d 100644
--- a/sam2/sam2_image_predictor.py
+++ b/sam2/sam2_image_predictor.py
@@ -13,7 +13,6 @@ import torch
 from PIL.Image import Image
 
 from sam2.modeling.sam2_base import SAM2Base
-from sam2.build_sam import build_sam2_hf
 from sam2.utils.transforms import SAM2Transforms
 
 
@@ -74,6 +73,8 @@ class SAM2ImagePredictor:
         Returns:
           (SAM2ImagePredictor): The loaded model.
         """
+        from sam2.build_sam import build_sam2_hf
+
         sam_model = build_sam2_hf(model_id, **kwargs)
         return cls(sam_model)
 
diff --git a/sam2/sam2_video_predictor.py b/sam2/sam2_video_predictor.py
index 0defcec..d687bc1 100644
--- a/sam2/sam2_video_predictor.py
+++ b/sam2/sam2_video_predictor.py
@@ -103,6 +103,23 @@ class SAM2VideoPredictor(SAM2Base):
         self._get_image_feature(inference_state, frame_idx=0, batch_size=1)
         return inference_state
 
+    @classmethod
+    def from_pretrained(cls, model_id: str, **kwargs) -> "SAM2ImagePredictor":
+        """
+        Load a pretrained model from the Hugging Face model hub.
+
+        Arguments:
+          model_id (str): The Hugging Face repository ID.
+          **kwargs: Additional arguments to pass to the model constructor.
+
+        Returns:
+          (SAM2ImagePredictor): The loaded model.
+        """
+        from sam2.build_sam import build_sam2_video_predictor_hf
+
+        sam_model = build_sam2_video_predictor_hf(model_id, **kwargs)
+        return cls(sam_model)
+    
     def _obj_id_to_idx(self, inference_state, obj_id):
         """Map client-side object id to model-side object index."""
         obj_idx = inference_state["obj_id_to_idx"].get(obj_id, None)