From f79f79a635b8f5a4fd06a8eea0c83688a42538c5 Mon Sep 17 00:00:00 2001
From: Isaac Robinson <isaac@roboflow.com>
Date: Thu, 14 Nov 2024 22:05:16 +0000
Subject: [PATCH 1/4] bumping owlv2 version and putting cache size in env

---
 inference/core/env.py           | 10 +++++++++-
 inference/models/owlv2/owlv2.py | 15 ++++++++++-----
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/inference/core/env.py b/inference/core/env.py
index 6eb060216..d9c02fd2f 100644
--- a/inference/core/env.py
+++ b/inference/core/env.py
@@ -74,11 +74,19 @@
 
 # Gaze version ID, default is "L2CS"
 GAZE_VERSION_ID = os.getenv("GAZE_VERSION_ID", "L2CS")
-OWLV2_VERSION_ID = os.getenv("OWLV2_VERSION_ID", "owlv2-base-patch16-ensemble")
 
 # Gaze model ID
 GAZE_MODEL_ID = f"gaze/{CLIP_VERSION_ID}"
 
+# OWLv2 version ID, default is "owlv2-large-patch14-ensemble"
+OWLV2_VERSION_ID = os.getenv("OWLV2_VERSION_ID", "owlv2-large-patch14-ensemble")
+
+# OWLv2 image cache size, default is 1000
+OWLV2_IMAGE_CACHE_SIZE = int(os.getenv("OWLV2_IMAGE_CACHE_SIZE", 1000))
+
+# OWLv2 model cache size, default is 100
+OWLV2_MODEL_CACHE_SIZE = int(os.getenv("OWLV2_MODEL_CACHE_SIZE", 10))
+
 # Maximum batch size for GAZE, default is 8
 GAZE_MAX_BATCH_SIZE = int(os.getenv("GAZE_MAX_BATCH_SIZE", 8))
 
diff --git a/inference/models/owlv2/owlv2.py b/inference/models/owlv2/owlv2.py
index 7e73c9407..21fc3ac94 100644
--- a/inference/models/owlv2/owlv2.py
+++ b/inference/models/owlv2/owlv2.py
@@ -16,7 +16,12 @@
     ObjectDetectionInferenceResponse,
     ObjectDetectionPrediction,
 )
-from inference.core.env import DEVICE, MAX_DETECTIONS
+from inference.core.env import (
+    DEVICE,
+    MAX_DETECTIONS,
+    OWLV2_IMAGE_CACHE_SIZE,
+    OWLV2_MODEL_CACHE_SIZE,
+)
 from inference.core.models.roboflow import (
     DEFAULT_COLOR_PALETTE,
     RoboflowCoreModel,
@@ -281,11 +286,11 @@ def __init__(self, *args, model_id="owlv2/owlv2-base-patch16-ensemble", **kwargs
 
     def reset_cache(self):
         # each entry should be on the order of 300*4KB, so 1000 is 400MB of CUDA memory
-        self.image_embed_cache = LimitedSizeDict(size_limit=1000)
+        self.image_embed_cache = LimitedSizeDict(size_limit=OWLV2_IMAGE_CACHE_SIZE)
         # each entry should be on the order of 10 bytes, so 1000 is 10KB
-        self.image_size_cache = LimitedSizeDict(size_limit=1000)
-        # entry size will vary depending on the number of samples, but 100 should be safe
-        self.class_embeddings_cache = LimitedSizeDict(size_limit=100)
+        self.image_size_cache = LimitedSizeDict(size_limit=OWLV2_IMAGE_CACHE_SIZE)
+        # entry size will vary depending on the number of samples, but 10 should be safe
+        self.class_embeddings_cache = LimitedSizeDict(size_limit=OWLV2_MODEL_CACHE_SIZE)
 
     def draw_predictions(
         self,

From 434e2f0a5e20e9d665622bed40237076496ec2be Mon Sep 17 00:00:00 2001
From: Isaac Robinson <isaac@roboflow.com>
Date: Thu, 14 Nov 2024 22:27:04 +0000
Subject: [PATCH 2/4] change testing default to large owlv2

---
 inference/models/owlv2/owlv2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inference/models/owlv2/owlv2.py b/inference/models/owlv2/owlv2.py
index 21fc3ac94..c73c525a6 100644
--- a/inference/models/owlv2/owlv2.py
+++ b/inference/models/owlv2/owlv2.py
@@ -261,7 +261,7 @@ class OwlV2(RoboflowCoreModel):
     task_type = "object-detection"
     box_format = "xywh"
 
-    def __init__(self, *args, model_id="owlv2/owlv2-base-patch16-ensemble", **kwargs):
+    def __init__(self, *args, model_id="owlv2/owlv2-large-patch14-ensemble", **kwargs):
         super().__init__(*args, model_id=model_id, **kwargs)
         hf_id = os.path.join("google", self.version_id)
         processor = Owlv2Processor.from_pretrained(hf_id)

From 130ff0bdd6ef84a684ea1b8dd17d8fcf582ac552 Mon Sep 17 00:00:00 2001
From: Isaac Robinson <isaac@roboflow.com>
Date: Thu, 14 Nov 2024 22:39:18 +0000
Subject: [PATCH 3/4] adding notes on cache size defaults

---
 inference/core/env.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/inference/core/env.py b/inference/core/env.py
index d9c02fd2f..8194afa63 100644
--- a/inference/core/env.py
+++ b/inference/core/env.py
@@ -81,11 +81,11 @@
 # OWLv2 version ID, default is "owlv2-large-patch14-ensemble"
 OWLV2_VERSION_ID = os.getenv("OWLV2_VERSION_ID", "owlv2-large-patch14-ensemble")
 
-# OWLv2 image cache size, default is 1000
+# OWLv2 image cache size, default is 1000 since each image has max <MAX_DETECTIONS> boxes at ~4kb each
 OWLV2_IMAGE_CACHE_SIZE = int(os.getenv("OWLV2_IMAGE_CACHE_SIZE", 1000))
 
-# OWLv2 model cache size, default is 100
-OWLV2_MODEL_CACHE_SIZE = int(os.getenv("OWLV2_MODEL_CACHE_SIZE", 10))
+# OWLv2 model cache size, default is 100 as memory is num_prompts * ~4kb and num_prompts is rarely above 1000 (but could be much higher)
+OWLV2_MODEL_CACHE_SIZE = int(os.getenv("OWLV2_MODEL_CACHE_SIZE", 100))
 
 # Maximum batch size for GAZE, default is 8
 GAZE_MAX_BATCH_SIZE = int(os.getenv("GAZE_MAX_BATCH_SIZE", 8))

From 82571a646a479bbe36e2f1052666cdf99d694951 Mon Sep 17 00:00:00 2001
From: Isaac Robinson <isaac@roboflow.com>
Date: Thu, 14 Nov 2024 23:09:39 +0000
Subject: [PATCH 4/4] properly pipe version from env variable

---
 inference/models/owlv2/owlv2.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/inference/models/owlv2/owlv2.py b/inference/models/owlv2/owlv2.py
index c73c525a6..c02b8aecf 100644
--- a/inference/models/owlv2/owlv2.py
+++ b/inference/models/owlv2/owlv2.py
@@ -21,6 +21,7 @@
     MAX_DETECTIONS,
     OWLV2_IMAGE_CACHE_SIZE,
     OWLV2_MODEL_CACHE_SIZE,
+    OWLV2_VERSION_ID,
 )
 from inference.core.models.roboflow import (
     DEFAULT_COLOR_PALETTE,
@@ -261,7 +262,7 @@ class OwlV2(RoboflowCoreModel):
     task_type = "object-detection"
     box_format = "xywh"
 
-    def __init__(self, *args, model_id="owlv2/owlv2-large-patch14-ensemble", **kwargs):
+    def __init__(self, *args, model_id=f"owlv2/{OWLV2_VERSION_ID}", **kwargs):
         super().__init__(*args, model_id=model_id, **kwargs)
         hf_id = os.path.join("google", self.version_id)
         processor = Owlv2Processor.from_pretrained(hf_id)