GLM-5.2 OpenAI-Appropriate API: A Arms-On Information to Reasoning Effort, Operate Calling, and Lengthy-Context Retrieval

GLM-5.2 OpenAI-Appropriate API: A Arms-On Information to Reasoning Effort, Operate Calling, and Lengthy-Context Retrieval


import sys, subprocess
subprocess.run([sys.executable, "-m", "pip", "install", "-q", "-U", "openai"], examine=False)
import os, re, json, time, getpass
from openai import OpenAI
PROVIDERS = {
   "zai":         {"base_url": "https://api.z.ai/api/paas/v4/",   "mannequin": "glm-5.2",        "env": "ZAI_API_KEY"},
   "openrouter":  {"base_url": "https://openrouter.ai/api/v1",    "mannequin": "z-ai/glm-5.2",   "env": "OPENROUTER_API_KEY"},
   "collectively":    {"base_url": "https://api.collectively.xyz/v1",     "mannequin": "zai-org/GLM-5.2","env": "TOGETHER_API_KEY"},
   "requesty":    {"base_url": "https://router.requesty.ai/v1",   "mannequin": "zai/glm-5.2",    "env": "REQUESTY_API_KEY"},
   "huggingface": {"base_url": "https://router.huggingface.co/v1","mannequin": "zai-org/GLM-5.2","env": "HF_TOKEN"},
}
PROVIDER = "zai"
CFG   = PROVIDERS[PROVIDER]
MODEL = CFG["model"]
def load_api_key(env_name):
   attempt:
       from google.colab import userdata
       v = userdata.get(env_name)
       if v: return v
   besides Exception:
       cross
   if os.environ.get(env_name):
       return os.environ[env_name]
   return getpass.getpass(f"Enter your {env_name}: ")
shopper = OpenAI(api_key=load_api_key(CFG["env"]), base_url=CFG["base_url"])
PRICE_IN_PER_M, PRICE_OUT_PER_M = 1.40, 4.40
_USAGE = {"in": 0, "out": 0, "calls": 0}
def _track(utilization):
   if utilization:
       _USAGE["in"]    += getattr(utilization, "prompt_tokens", 0) or 0
       _USAGE["out"]   += getattr(utilization, "completion_tokens", 0) or 0
       _USAGE["calls"] += 1
def get_reasoning(obj):
   """Pull GLM's hidden reasoning hint from a message/delta (a provider-extra subject)."""
   val = getattr(obj, "reasoning_content", None)
   if val: return val
   further = getattr(obj, "model_extra", None) or {}
   if further.get("reasoning_content"): return further["reasoning_content"]
   attempt:    return obj.to_dict().get("reasoning_content")
   besides Exception: return None
def chat(messages, effort=None, pondering=True, instruments=None, tool_choice="auto",
        stream=False, max_tokens=2048, temperature=1.0, tool_stream=False):
   """
   effort:   None | "excessive" | "max"   (GLM-5.2 thinking-effort stage; max is the mannequin default)
   pondering: True -> deep pondering on; False -> off (quick, low-cost, low-latency)
   GLM-specific params undergo extra_body so any OpenAI shopper works.
   """
   further = {"pondering": {"kind": "enabled" if pondering else "disabled"}}
   if effort and pondering: further["reasoning_effort"] = effort
   if tool_stream:         further["tool_stream"] = True
   kwargs = dict(mannequin=MODEL, messages=messages, max_tokens=max_tokens,
                 temperature=temperature, stream=stream, extra_body=further)
   if instruments:
       kwargs.replace(instruments=instruments, tool_choice=tool_choice)
   if stream:
       kwargs["stream_options"] = {"include_usage": True}
   return shopper.chat.completions.create(**kwargs)



Source link

Leave a Reply

Your email address will not be published. Required fields are marked *