Python
Add one background request.
Put this after your existing model call. If Squire fails, your app still works.
import json
import os
from threading import Thread
from urllib.request import Request, urlopen
SQUIRE_INGEST_KEY = os.environ["SQUIRE_INGEST_KEY"]
def send_to_squire(payload):
request = Request(
"https://squire.run/v1/compare",
data=json.dumps(payload).encode("utf-8"),
headers={
"Authorization": f"Bearer {SQUIRE_INGEST_KEY}",
"Content-Type": "application/json",
},
method="POST",
)
try:
urlopen(request, timeout=2).close()
except Exception:
pass
PRODUCTION_MODEL = "gpt-4.1" # use your existing production model
production_request = {
"model": PRODUCTION_MODEL,
"messages": messages,
# If your production call uses response_format, tools, temperature,
# max_tokens, or other OpenAI-compatible settings, keep them here.
}
response = client.chat.completions.create(**production_request)
Thread(
target=send_to_squire,
args=({
**production_request,
"reference_output": response.choices[0].message.content,
},),
daemon=True,
).start()