|
31 | 31 | # Timeout (seconds) for a single Perplexity API call. |
32 | 32 | _PER_ROUND_TIMEOUT = 30 |
33 | 33 |
|
| 34 | +# Tool-selection threshold: when more tools than this are available, |
| 35 | +# make a fast preliminary call to pick only the relevant ones. |
| 36 | +_TOOL_SELECTION_THRESHOLD = 20 |
| 37 | + |
| 38 | +# Maximum tools the selector may return. |
| 39 | +_TOOL_SELECTION_MAX = 15 |
| 40 | + |
| 41 | +# Timeout (seconds) for the tool-selection call. |
| 42 | +_TOOL_SELECTION_TIMEOUT = 15 |
| 43 | + |
| 44 | + |
| 45 | +async def select_relevant_tools( |
| 46 | + client: AsyncOpenAI, |
| 47 | + model: str, |
| 48 | + user_message: str, |
| 49 | + all_tools: list[dict], |
| 50 | +) -> list[dict]: |
| 51 | + """Use a fast LLM call to pick only the tools relevant to *user_message*. |
| 52 | +
|
| 53 | + Returns a filtered subset (≤ ``_TOOL_SELECTION_MAX``) of *all_tools*. |
| 54 | + On any failure the full list is returned so the main flow is never blocked. |
| 55 | + """ |
| 56 | + # Build a compact one-line-per-tool catalog for the selector prompt. |
| 57 | + catalog_lines: list[str] = [] |
| 58 | + for idx, t in enumerate(all_tools): |
| 59 | + name = t.get("name", "unknown") |
| 60 | + desc = (t.get("description") or "")[:120] |
| 61 | + catalog_lines.append(f"{idx}: {name} — {desc}") |
| 62 | + catalog = "\n".join(catalog_lines) |
| 63 | + |
| 64 | + selector_prompt = ( |
| 65 | + "Given the user's request, select ONLY the tools needed to fulfill it.\n" |
| 66 | + "Return a JSON array of tool index numbers (integers). Include tools that " |
| 67 | + "might be needed for follow-up steps (e.g., if creating a document and sharing " |
| 68 | + "a link, include both create and share tools).\n" |
| 69 | + f"Select at most {_TOOL_SELECTION_MAX} tools. Return ONLY a JSON array like " |
| 70 | + "[0, 3, 7], no explanation.\n\n" |
| 71 | + f'User request: "{user_message}"\n\n' |
| 72 | + f"Available tools:\n{catalog}" |
| 73 | + ) |
| 74 | + |
| 75 | + try: |
| 76 | + resp = await asyncio.wait_for( |
| 77 | + client.responses.create( |
| 78 | + model=model, |
| 79 | + instructions="You are a tool selector. Return ONLY a JSON array of integers.", |
| 80 | + input=selector_prompt, |
| 81 | + store=False, |
| 82 | + ), |
| 83 | + timeout=_TOOL_SELECTION_TIMEOUT, |
| 84 | + ) |
| 85 | + |
| 86 | + raw_text = "" |
| 87 | + for item in resp.output: |
| 88 | + if item.type == "message": |
| 89 | + for c in getattr(item, "content", []): |
| 90 | + if hasattr(c, "text") and c.text: |
| 91 | + raw_text += c.text |
| 92 | + if not raw_text: |
| 93 | + raw_text = str(resp.output_text or "") |
| 94 | + |
| 95 | + # Strip markdown fences and extract the JSON array. |
| 96 | + raw_text = raw_text.strip().strip("`").strip() |
| 97 | + if raw_text.startswith("json"): |
| 98 | + raw_text = raw_text[4:].strip() |
| 99 | + |
| 100 | + match = re.search(r"\[[\d,\s]+\]", raw_text) |
| 101 | + if not match: |
| 102 | + logger.warning("Tool selector returned unparseable response — using all tools") |
| 103 | + return all_tools |
| 104 | + |
| 105 | + indices: list[int] = json.loads(match.group()) |
| 106 | + selected = [all_tools[i] for i in indices if 0 <= i < len(all_tools)] |
| 107 | + |
| 108 | + if not selected: |
| 109 | + logger.warning("Tool selector returned empty set — using all tools") |
| 110 | + return all_tools |
| 111 | + |
| 112 | + logger.info( |
| 113 | + "Tool selector narrowed %d → %d tools: %s", |
| 114 | + len(all_tools), |
| 115 | + len(selected), |
| 116 | + [t.get("name") for t in selected], |
| 117 | + ) |
| 118 | + return selected |
| 119 | + |
| 120 | + except asyncio.TimeoutError: |
| 121 | + logger.warning("Tool selector timed out (%ds) — using all tools", _TOOL_SELECTION_TIMEOUT) |
| 122 | + return all_tools |
| 123 | + except Exception as exc: |
| 124 | + logger.warning("Tool selector failed (%s) — using all tools", exc) |
| 125 | + return all_tools |
| 126 | + |
34 | 127 |
|
35 | 128 | class PerplexityClient: |
36 | 129 | """Async client for Perplexity AI using the Agent API (Responses API).""" |
@@ -66,6 +159,11 @@ async def invoke( |
66 | 159 | """ |
67 | 160 | logger.info("Invoking Perplexity model=%s (tools=%d)", self.model, len(tools or [])) |
68 | 161 |
|
| 162 | + # When too many tools are registered, use a fast selector call to |
| 163 | + # narrow down to just the relevant ones before the main API request. |
| 164 | + if tools and len(tools) > _TOOL_SELECTION_THRESHOLD: |
| 165 | + tools = await select_relevant_tools(self._client, self.model, user_message, tools) |
| 166 | + |
69 | 167 | create_kwargs: dict[str, Any] = { |
70 | 168 | "model": self.model, |
71 | 169 | "input": user_message, |
@@ -107,7 +205,14 @@ async def invoke( |
107 | 205 | if ctx: |
108 | 206 | create_kwargs["input"] = f"{user_message}\n\n{ctx}" |
109 | 207 | tools = None |
110 | | - response = await self._client.responses.create(**create_kwargs) |
| 208 | + try: |
| 209 | + response = await asyncio.wait_for( |
| 210 | + self._client.responses.create(**create_kwargs), |
| 211 | + timeout=_PER_ROUND_TIMEOUT, |
| 212 | + ) |
| 213 | + except asyncio.TimeoutError: |
| 214 | + logger.warning("Perplexity API fallback round %d timed out (%ds) — returning partial answer", _round + 1, _PER_ROUND_TIMEOUT) |
| 215 | + break |
111 | 216 | else: |
112 | 217 | raise |
113 | 218 |
|
@@ -186,9 +291,9 @@ async def invoke( |
186 | 291 | arguments = self._enrich_arguments(fc.name, arguments, user_message, tools or []) |
187 | 292 |
|
188 | 293 | logger.info("Executing MCP tool: %s (round %d)", fc.name, _round + 1) |
189 | | - logger.info("Tool arguments: %s", json.dumps(arguments, indent=2, default=str)) |
| 294 | + logger.debug("Tool arguments: %s", json.dumps(arguments, indent=2, default=str)) |
190 | 295 | result = await tool_executor(fc.name, arguments) |
191 | | - logger.info("Tool result (first 500 chars): %.500s", json.dumps(result, default=str) if not isinstance(result, str) else result) |
| 296 | + logger.debug("Tool result (first 500 chars): %.500s", json.dumps(result, default=str) if not isinstance(result, str) else result) |
192 | 297 |
|
193 | 298 | # Track resource creation/finalization generically |
194 | 299 | tool_lower = fc.name.lower() |
|
0 commit comments