diff --git a/ollama_adapter.py b/ollama_adapter.py index 03319f0..ad5792d 100644 --- a/ollama_adapter.py +++ b/ollama_adapter.py @@ -114,7 +114,7 @@ class OllamaClient: data.update(filtered) body = json.dumps(data).encode('utf-8') - req = Request(self._url(generic_endpoint), data=body, headers={'Content-Type': 'application/json'}) + req = Request(self._url(generic_endpoint), data=body, headers={'Content-Type': 'application/json', 'keep_alive': '180'}) try: with urlopen(req, timeout=self.timeout) as resp: raw = resp.read().decode('utf-8') @@ -296,7 +296,7 @@ class OllamaClient: logger.debug('Ollama embeddings request to %s; body=%s', self._url(ep), json.dumps(data)[:2000]) except Exception: logger.debug('Ollama embeddings request prepared') - req = Request(self._url(ep), data=body, headers={'Content-Type': 'application/json'}) + req = Request(self._url(ep), data=body, headers={'Content-Type': 'application/json', 'keep_alive': '180'}) try: with urlopen(req, timeout=self.timeout) as resp: raw = resp.read().decode('utf-8') @@ -369,7 +369,7 @@ class OllamaClient: # Try /api/tags (some Ollama builds) then /api/list for path in ['/api/tags', '/api/list']: try: - req = Request(self._url(path)) + req = Request(self._url(path), headers={'keep_alive': '180'}) with urlopen(req, timeout=5) as resp: raw = resp.read().decode('utf-8') try: @@ -409,7 +409,7 @@ def get_ollama_client() -> Optional[OllamaClient]: ) # quick health check (non-raising) try: - req = Request(client._url('/api/list')) + req = Request(client._url('/api/list'), headers={'keep_alive': '180'}) with urlopen(req, timeout=2): # ignore content; just successful connect pass diff --git a/ollama_introspection.py b/ollama_introspection.py index e9a1113..3c1321d 100644 --- a/ollama_introspection.py +++ b/ollama_introspection.py @@ -57,7 +57,7 @@ def _base_url(explicit: Optional[str] = None) -> str: def _request_json(path: str, base_url: Optional[str] = None, timeout: int = 10) -> Dict[str, Any]: full = urljoin(_base_url(base_url) + '/', path.lstrip('/')) - req = Request(full) + req = Request(full, headers={'keep_alive': '180'}) try: with urlopen(req, timeout=timeout) as resp: raw = resp.read().decode('utf-8') @@ -124,7 +124,7 @@ def show_model(model: str, base_url: Optional[str] = None) -> ModelShowInfo: path = '/api/show' full = urljoin(_base_url(base_url) + '/', path.lstrip('/')) body = json.dumps(payload).encode('utf-8') - req = Request(full, data=body, headers={'Content-Type': 'application/json'}) + req = Request(full, data=body, headers={'Content-Type': 'application/json', 'keep_alive': '180'}) try: with urlopen(req, timeout=10) as resp: raw = resp.read().decode('utf-8') diff --git a/scripts/ollama_demo.py b/scripts/ollama_demo.py index 6b998c9..23e7856 100644 --- a/scripts/ollama_demo.py +++ b/scripts/ollama_demo.py @@ -43,7 +43,7 @@ def main(): cfg = get_ollama_config() url = cfg.get('url').rstrip('/') + '/api/generate' body = json.dumps({'model': client.chat_model, 'prompt': prompt}).encode('utf-8') - req = Request(url, data=body, headers={'Content-Type': 'application/json'}) + req = Request(url, data=body, headers={'Content-Type': 'application/json', 'keep_alive': '180'}) with urlopen(req, timeout=5) as resp: raw = resp.read().decode('utf-8') print('\n=== Raw /api/generate response ===') @@ -70,7 +70,7 @@ def main(): cfg = get_ollama_config() url = cfg.get('url').rstrip('/') + f'/api/models/{client.embedding_model}/embed' body = json.dumps({'input': texts}).encode('utf-8') - req = Request(url, data=body, headers={'Content-Type': 'application/json'}) + req = Request(url, data=body, headers={'Content-Type': 'application/json', 'keep_alive': '180'}) with urlopen(req, timeout=5) as resp: raw = resp.read().decode('utf-8') print('\n=== Raw model-specific embed response ===')