Building a Semantic Search Engine and Open-Status Classifier over the ResearchMath-14k Dataset

from sentence_transformers import util def search(query, k=5): q = model.encode([query], normalize_embeddings=True) sims = util.cos_sim(q, emb)[0].cpu().numpy() idx = sims.argsort()[::-1][:k] print(f’\n=== Query: “{query}” ===’) for rank, i in enumerate(idx, 1): row = work.iloc[i] print(f”\n[{rank}] sim={sims[i]:.3f} | {row[‘taxonomy_level_1’]} ” f”| status={row[‘open_status’]}”) print(” “, row[TEXT_COL][:260].replace(“\n”, ” “), “…”) search(“rational points on hyperelliptic curves”) search(“multiplicativity of maximal output p-norm of…

Read More