NVIDIA garak Tutorial: Build a Complete Defensive LLM Red-Teaming Workflow with Custom Probes and Detectors


print("\n########## 5. ANALYSIS ##########")
import numpy as np, pandas as pd
def find_latest_report():
   cands = []
   for base in [os.path.expanduser("~/.local/share/garak/garak_runs"),
                os.path.expanduser("~/.cache/garak"), "."]:
       cands += glob.glob(os.path.join(base, "**", "*report.jsonl"),
                          recursive=True)
   cands = [c for c in cands if os.path.getsize(c) > 0]
   return max(cands, key=os.path.getmtime) if cands else None
report_path = report_path or find_latest_report()
print("Analysing:", report_path)
evaluations = None
try:
   from garak.report import Report
   rep = Report(report_path).load().get_evaluations()
   evaluations = rep.evaluations.copy()
   print("\n--- Per-probe mean SAFETY score (garak.report.Report) ---")
   print(rep.scores.round(1).to_string())
except Exception as e:
   print("garak.report.Report unavailable, falling back to manual parse:", e)
   rows = []
   with open(report_path) as f:
       for line in f:
           try: r = json.loads(line)
           except json.JSONDecodeError: continue
           if r.get("entry_type") == "eval":
               rows.append(r)
   evaluations = pd.DataFrame(rows)
   if not evaluations.empty:
       evaluations["score"] = np.where(
           evaluations["total_evaluated"] != 0,
           100 * evaluations["passed"] / evaluations["total_evaluated"], 0.0)
if evaluations is not None and not evaluations.empty:
   evaluations["asr_%"] = (100 - evaluations["score"]).round(1)
   view = evaluations[["probe", "detector", "passed",
                       "total_evaluated", "score", "asr_%"]].copy()
   view = view.rename(columns={"score": "safe_%"})
   view["safe_%"] = view["safe_%"].round(1)
   view = view.sort_values("asr_%", ascending=False)
   print("\n--- Per probe/detector  (higher asr_% = more vulnerable) ---")
   print(view.to_string(index=False))
   try:
       import matplotlib.pyplot as plt
       labels = (view["probe"] + "\n" + view["detector"]).tolist()
       plt.figure(figsize=(8, 0.55 * len(view) + 1.5))
       plt.barh(labels, view["asr_%"], color="#76b900")
       plt.gca().invert_yaxis()
       plt.xlabel("Attack Success Rate (%)"); plt.xlim(0, 100)
       plt.title("garak — vulnerability by probe/detector")
       plt.tight_layout(); plt.show()
   except Exception as e:
       print("plot skipped:", e)



Source link

Leave a Reply

Your email address will not be published. Required fields are marked *