From f79f79a635b8f5a4fd06a8eea0c83688a42538c5 Mon Sep 17 00:00:00 2001 From: Isaac Robinson Date: Thu, 14 Nov 2024 22:05:16 +0000 Subject: [PATCH 1/4] bumping owlv2 version and putting cache size in env --- inference/core/env.py | 10 +++++++++- inference/models/owlv2/owlv2.py | 15 ++++++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/inference/core/env.py b/inference/core/env.py index 6eb060216..d9c02fd2f 100644 --- a/inference/core/env.py +++ b/inference/core/env.py @@ -74,11 +74,19 @@ # Gaze version ID, default is "L2CS" GAZE_VERSION_ID = os.getenv("GAZE_VERSION_ID", "L2CS") -OWLV2_VERSION_ID = os.getenv("OWLV2_VERSION_ID", "owlv2-base-patch16-ensemble") # Gaze model ID GAZE_MODEL_ID = f"gaze/{CLIP_VERSION_ID}" +# OWLv2 version ID, default is "owlv2-large-patch14-ensemble" +OWLV2_VERSION_ID = os.getenv("OWLV2_VERSION_ID", "owlv2-large-patch14-ensemble") + +# OWLv2 image cache size, default is 1000 +OWLV2_IMAGE_CACHE_SIZE = int(os.getenv("OWLV2_IMAGE_CACHE_SIZE", 1000)) + +# OWLv2 model cache size, default is 100 +OWLV2_MODEL_CACHE_SIZE = int(os.getenv("OWLV2_MODEL_CACHE_SIZE", 10)) + # Maximum batch size for GAZE, default is 8 GAZE_MAX_BATCH_SIZE = int(os.getenv("GAZE_MAX_BATCH_SIZE", 8)) diff --git a/inference/models/owlv2/owlv2.py b/inference/models/owlv2/owlv2.py index 7e73c9407..21fc3ac94 100644 --- a/inference/models/owlv2/owlv2.py +++ b/inference/models/owlv2/owlv2.py @@ -16,7 +16,12 @@ ObjectDetectionInferenceResponse, ObjectDetectionPrediction, ) -from inference.core.env import DEVICE, MAX_DETECTIONS +from inference.core.env import ( + DEVICE, + MAX_DETECTIONS, + OWLV2_IMAGE_CACHE_SIZE, + OWLV2_MODEL_CACHE_SIZE, +) from inference.core.models.roboflow import ( DEFAULT_COLOR_PALETTE, RoboflowCoreModel, @@ -281,11 +286,11 @@ def __init__(self, *args, model_id="owlv2/owlv2-base-patch16-ensemble", **kwargs def reset_cache(self): # each entry should be on the order of 300*4KB, so 1000 is 400MB of CUDA memory - self.image_embed_cache = LimitedSizeDict(size_limit=1000) + self.image_embed_cache = LimitedSizeDict(size_limit=OWLV2_IMAGE_CACHE_SIZE) # each entry should be on the order of 10 bytes, so 1000 is 10KB - self.image_size_cache = LimitedSizeDict(size_limit=1000) - # entry size will vary depending on the number of samples, but 100 should be safe - self.class_embeddings_cache = LimitedSizeDict(size_limit=100) + self.image_size_cache = LimitedSizeDict(size_limit=OWLV2_IMAGE_CACHE_SIZE) + # entry size will vary depending on the number of samples, but 10 should be safe + self.class_embeddings_cache = LimitedSizeDict(size_limit=OWLV2_MODEL_CACHE_SIZE) def draw_predictions( self, From 434e2f0a5e20e9d665622bed40237076496ec2be Mon Sep 17 00:00:00 2001 From: Isaac Robinson Date: Thu, 14 Nov 2024 22:27:04 +0000 Subject: [PATCH 2/4] change testing default to large owlv2 --- inference/models/owlv2/owlv2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference/models/owlv2/owlv2.py b/inference/models/owlv2/owlv2.py index 21fc3ac94..c73c525a6 100644 --- a/inference/models/owlv2/owlv2.py +++ b/inference/models/owlv2/owlv2.py @@ -261,7 +261,7 @@ class OwlV2(RoboflowCoreModel): task_type = "object-detection" box_format = "xywh" - def __init__(self, *args, model_id="owlv2/owlv2-base-patch16-ensemble", **kwargs): + def __init__(self, *args, model_id="owlv2/owlv2-large-patch14-ensemble", **kwargs): super().__init__(*args, model_id=model_id, **kwargs) hf_id = os.path.join("google", self.version_id) processor = Owlv2Processor.from_pretrained(hf_id) From 130ff0bdd6ef84a684ea1b8dd17d8fcf582ac552 Mon Sep 17 00:00:00 2001 From: Isaac Robinson Date: Thu, 14 Nov 2024 22:39:18 +0000 Subject: [PATCH 3/4] adding notes on cache size defaults --- inference/core/env.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/inference/core/env.py b/inference/core/env.py index d9c02fd2f..8194afa63 100644 --- a/inference/core/env.py +++ b/inference/core/env.py @@ -81,11 +81,11 @@ # OWLv2 version ID, default is "owlv2-large-patch14-ensemble" OWLV2_VERSION_ID = os.getenv("OWLV2_VERSION_ID", "owlv2-large-patch14-ensemble") -# OWLv2 image cache size, default is 1000 +# OWLv2 image cache size, default is 1000 since each image has max boxes at ~4kb each OWLV2_IMAGE_CACHE_SIZE = int(os.getenv("OWLV2_IMAGE_CACHE_SIZE", 1000)) -# OWLv2 model cache size, default is 100 -OWLV2_MODEL_CACHE_SIZE = int(os.getenv("OWLV2_MODEL_CACHE_SIZE", 10)) +# OWLv2 model cache size, default is 100 as memory is num_prompts * ~4kb and num_prompts is rarely above 1000 (but could be much higher) +OWLV2_MODEL_CACHE_SIZE = int(os.getenv("OWLV2_MODEL_CACHE_SIZE", 100)) # Maximum batch size for GAZE, default is 8 GAZE_MAX_BATCH_SIZE = int(os.getenv("GAZE_MAX_BATCH_SIZE", 8)) From 82571a646a479bbe36e2f1052666cdf99d694951 Mon Sep 17 00:00:00 2001 From: Isaac Robinson Date: Thu, 14 Nov 2024 23:09:39 +0000 Subject: [PATCH 4/4] properly pipe version from env variable --- inference/models/owlv2/owlv2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inference/models/owlv2/owlv2.py b/inference/models/owlv2/owlv2.py index c73c525a6..c02b8aecf 100644 --- a/inference/models/owlv2/owlv2.py +++ b/inference/models/owlv2/owlv2.py @@ -21,6 +21,7 @@ MAX_DETECTIONS, OWLV2_IMAGE_CACHE_SIZE, OWLV2_MODEL_CACHE_SIZE, + OWLV2_VERSION_ID, ) from inference.core.models.roboflow import ( DEFAULT_COLOR_PALETTE, @@ -261,7 +262,7 @@ class OwlV2(RoboflowCoreModel): task_type = "object-detection" box_format = "xywh" - def __init__(self, *args, model_id="owlv2/owlv2-large-patch14-ensemble", **kwargs): + def __init__(self, *args, model_id=f"owlv2/{OWLV2_VERSION_ID}", **kwargs): super().__init__(*args, model_id=model_id, **kwargs) hf_id = os.path.join("google", self.version_id) processor = Owlv2Processor.from_pretrained(hf_id)