feat: add keep_alive header to API requests for improved connection management #1

closed
opened by lenooby09.tech targeting previous
+4 -4
ollama_adapter.py
···
data.update(filtered)
body = json.dumps(data).encode('utf-8')
-
req = Request(self._url(generic_endpoint), data=body, headers={'Content-Type': 'application/json'})
+
req = Request(self._url(generic_endpoint), data=body, headers={'Content-Type': 'application/json', 'keep_alive': '180'})
try:
with urlopen(req, timeout=self.timeout) as resp:
raw = resp.read().decode('utf-8')
···
logger.debug('Ollama embeddings request to %s; body=%s', self._url(ep), json.dumps(data)[:2000])
except Exception:
logger.debug('Ollama embeddings request prepared')
-
req = Request(self._url(ep), data=body, headers={'Content-Type': 'application/json'})
+
req = Request(self._url(ep), data=body, headers={'Content-Type': 'application/json', 'keep_alive': '180'})
try:
with urlopen(req, timeout=self.timeout) as resp:
raw = resp.read().decode('utf-8')
···
# Try /api/tags (some Ollama builds) then /api/list
for path in ['/api/tags', '/api/list']:
try:
-
req = Request(self._url(path))
+
req = Request(self._url(path), headers={'keep_alive': '180'})
with urlopen(req, timeout=5) as resp:
raw = resp.read().decode('utf-8')
try:
···
)
# quick health check (non-raising)
try:
-
req = Request(client._url('/api/list'))
+
req = Request(client._url('/api/list'), headers={'keep_alive': '180'})
with urlopen(req, timeout=2):
# ignore content; just successful connect
pass
+2 -2
ollama_introspection.py
···
def _request_json(path: str, base_url: Optional[str] = None, timeout: int = 10) -> Dict[str, Any]:
full = urljoin(_base_url(base_url) + '/', path.lstrip('/'))
-
req = Request(full)
+
req = Request(full, headers={'keep_alive': '180'})
try:
with urlopen(req, timeout=timeout) as resp:
raw = resp.read().decode('utf-8')
···
path = '/api/show'
full = urljoin(_base_url(base_url) + '/', path.lstrip('/'))
body = json.dumps(payload).encode('utf-8')
-
req = Request(full, data=body, headers={'Content-Type': 'application/json'})
+
req = Request(full, data=body, headers={'Content-Type': 'application/json', 'keep_alive': '180'})
try:
with urlopen(req, timeout=10) as resp:
raw = resp.read().decode('utf-8')
+2 -2
scripts/ollama_demo.py
···
cfg = get_ollama_config()
url = cfg.get('url').rstrip('/') + '/api/generate'
body = json.dumps({'model': client.chat_model, 'prompt': prompt}).encode('utf-8')
-
req = Request(url, data=body, headers={'Content-Type': 'application/json'})
+
req = Request(url, data=body, headers={'Content-Type': 'application/json', 'keep_alive': '180'})
with urlopen(req, timeout=5) as resp:
raw = resp.read().decode('utf-8')
print('\n=== Raw /api/generate response ===')
···
cfg = get_ollama_config()
url = cfg.get('url').rstrip('/') + f'/api/models/{client.embedding_model}/embed'
body = json.dumps({'input': texts}).encode('utf-8')
-
req = Request(url, data=body, headers={'Content-Type': 'application/json'})
+
req = Request(url, data=body, headers={'Content-Type': 'application/json', 'keep_alive': '180'})
with urlopen(req, timeout=5) as resp:
raw = resp.read().decode('utf-8')
print('\n=== Raw model-specific embed response ===')