import sys, subprocess
subprocess.run([sys.executable, "-m", "pip", "install", "-q", "-U", "openai"], examine=False)
import os, re, json, time, getpass
from openai import OpenAI
PROVIDERS = {
"zai": {"base_url": "https://api.z.ai/api/paas/v4/", "mannequin": "glm-5.2", "env": "ZAI_API_KEY"},
"openrouter": {"base_url": "https://openrouter.ai/api/v1", "mannequin": "z-ai/glm-5.2", "env": "OPENROUTER_API_KEY"},
"collectively": {"base_url": "https://api.collectively.xyz/v1", "mannequin": "zai-org/GLM-5.2","env": "TOGETHER_API_KEY"},
"requesty": {"base_url": "https://router.requesty.ai/v1", "mannequin": "zai/glm-5.2", "env": "REQUESTY_API_KEY"},
"huggingface": {"base_url": "https://router.huggingface.co/v1","mannequin": "zai-org/GLM-5.2","env": "HF_TOKEN"},
}
PROVIDER = "zai"
CFG = PROVIDERS[PROVIDER]
MODEL = CFG["model"]
def load_api_key(env_name):
attempt:
from google.colab import userdata
v = userdata.get(env_name)
if v: return v
besides Exception:
cross
if os.environ.get(env_name):
return os.environ[env_name]
return getpass.getpass(f"Enter your {env_name}: ")
shopper = OpenAI(api_key=load_api_key(CFG["env"]), base_url=CFG["base_url"])
PRICE_IN_PER_M, PRICE_OUT_PER_M = 1.40, 4.40
_USAGE = {"in": 0, "out": 0, "calls": 0}
def _track(utilization):
if utilization:
_USAGE["in"] += getattr(utilization, "prompt_tokens", 0) or 0
_USAGE["out"] += getattr(utilization, "completion_tokens", 0) or 0
_USAGE["calls"] += 1
def get_reasoning(obj):
"""Pull GLM's hidden reasoning hint from a message/delta (a provider-extra subject)."""
val = getattr(obj, "reasoning_content", None)
if val: return val
further = getattr(obj, "model_extra", None) or {}
if further.get("reasoning_content"): return further["reasoning_content"]
attempt: return obj.to_dict().get("reasoning_content")
besides Exception: return None
def chat(messages, effort=None, pondering=True, instruments=None, tool_choice="auto",
stream=False, max_tokens=2048, temperature=1.0, tool_stream=False):
"""
effort: None | "excessive" | "max" (GLM-5.2 thinking-effort stage; max is the mannequin default)
pondering: True -> deep pondering on; False -> off (quick, low-cost, low-latency)
GLM-specific params undergo extra_body so any OpenAI shopper works.
"""
further = {"pondering": {"kind": "enabled" if pondering else "disabled"}}
if effort and pondering: further["reasoning_effort"] = effort
if tool_stream: further["tool_stream"] = True
kwargs = dict(mannequin=MODEL, messages=messages, max_tokens=max_tokens,
temperature=temperature, stream=stream, extra_body=further)
if instruments:
kwargs.replace(instruments=instruments, tool_choice=tool_choice)
if stream:
kwargs["stream_options"] = {"include_usage": True}
return shopper.chat.completions.create(**kwargs)