def normalize_ws(s: str) -> str:
return re.sub(r"s+", " ", s).strip()
RAW_DOCS = [
{
"doc_id": "trulens_core",
"title": "TruLens core idea",
"text": "TruLens is used to track and evaluate LLM applications. It can log app runs, compute feedback scores, and provide a dashboard to compare versions and investigate traces and results."
},
{
"doc_id": "trulens_feedback",
"title": "Feedback functions",
"text": "TruLens feedback functions can score groundedness, context relevance, and answer relevance. They are configured by specifying which parts of an app record should be used as inputs."
},
{
"doc_id": "trulens_rag",
"title": "RAG workflow",
"text": "A typical RAG system retrieves relevant chunks from a vector database and then generates an answer using those chunks as context. The quality depends on retrieval, prompt design, and generation behavior."
},
{
"doc_id": "trulens_instrumentation",
"title": "Instrumentation",
"text": "Instrumentation adds tracing spans to your app functions (like retrieval and generation). This makes it possible to analyze which contexts were retrieved, latency, token usage, and connect feedback evaluations to specific steps."
},
{
"doc_id": "vectorstores",
"title": "Vector stores and embeddings",
"text": "Vector stores index embeddings for text chunks, enabling semantic search. OpenAI embedding models can be used to embed chunks and queries, and Chroma can store them locally in memory for a notebook demo."
},
{
"doc_id": "prompting",
"title": "Prompting and citations",
"text": "Prompting can encourage careful, citation-grounded answers. A stronger prompt can enforce: answer only from context, be explicit about uncertainty, and provide short citations that map to retrieved chunks."
},
]
@dataclass
class Chunk:
chunk_id: str
doc_id: str
title: str
textual content: str
meta: Dict[str, Any]
def chunk_docs(docs, chunk_size=350, overlap=80) -> Listing[Chunk]:
chunks: Listing[Chunk] = []
for d in docs:
textual content = normalize_ws(d["text"])
begin = 0
idx = 0
whereas begin < len(textual content):
finish = min(len(textual content), begin + chunk_size)
chunk_text = textual content[start:end]
chunk_id = f'{d["doc_id"]}_c{idx}'
chunks.append(
Chunk(
chunk_id=chunk_id,
doc_id=d["doc_id"],
title=d["title"],
textual content=chunk_text,
meta={"doc_id": d["doc_id"], "title": d["title"], "chunk_index": idx},
)
)
idx += 1
begin = finish - overlap
if begin < 0:
begin = 0
if finish == len(textual content):
break
return chunks
CHUNKS = chunk_docs(RAW_DOCS)
Monday, February 23
Trending
- Cosmic Power Profile
- ‘Hamnet’, ‘One Battle After One other’, ‘Sinners’ vie for BAFTA glory
- When Will Ripple’s (XRP) Bull Run Resume? We Requested 4 AIs (And Their Solutions Shocked Us)
- Mortal Kombat-Impressed Olympic Ice Skaters Unleash Fatality And Friendship
- 50 Short Term Goal Examples That Will Change Your Life
- Constructing Painter and Marble Finisher 2026 Job Commercial Pakistan
- The Curator: Handy Amazon home gadgets that *really* work – National
- EU says it’ll settle for no improve in US tariffs
- ‘India lack unity, which is why they lost so badly’
- VectifyAI Launches Mafin 2.5 and PageIndex: Achieving 98.7% Financial RAG Accuracy with a New Open-Source Vectorless Tree Indexing.

