feat: Update agent interface to support optional auth handler name and improve observability token caching

Yogeshp-MSFT · Yogeshp-MSFT · commit 844487a4333b · 2026-05-08T17:44:25.000+05:30
diff --git a/python/perplexity/sample-agent/agent.py b/python/perplexity/sample-agent/agent.py
@@ -107,7 +107,7 @@ async def invoke_agent(
         self,
         message: str,
         auth: Authorization,
-        auth_handler_name: str,
+        auth_handler_name: str | None,
         context: TurnContext,
     ) -> str:
         # Log the user identity
@@ -193,7 +193,7 @@ async def _invoke_agent_with_inference_scope(
         self,
         message: str,
         auth: Authorization,
-        auth_handler_name: str,
+        auth_handler_name: str | None,
         context: TurnContext,
     ) -> str:
         """invoke_agent wrapped in an InferenceScope for observability."""
@@ -234,7 +234,7 @@ async def invoke_agent_with_scope(
         self,
         message: str,
         auth: Authorization,
-        auth_handler_name: str,
+        auth_handler_name: str | None,
         context: TurnContext,
     ) -> str:
         # Extract identity from the activity recipient (populated by the platform).
diff --git a/python/perplexity/sample-agent/agent_interface.py b/python/perplexity/sample-agent/agent_interface.py
@@ -6,6 +6,7 @@
 """
 
 from abc import ABC, abstractmethod
+from typing import Optional
 from microsoft_agents.hosting.core import Authorization, TurnContext
 
 
@@ -18,14 +19,14 @@ class AgentInterface(ABC):
     """
     @abstractmethod
     async def invoke_agent(
-        self, message: str, auth: Authorization, auth_handler_name: str, context: TurnContext
+        self, message: str, auth: Authorization, auth_handler_name: Optional[str], context: TurnContext
     ) -> str:
         """Process a user message and return a response."""
         pass
 
     @abstractmethod
     async def invoke_agent_with_scope(
-        self, message: str, auth: Authorization, auth_handler_name: str, context: TurnContext
+        self, message: str, auth: Authorization, auth_handler_name: Optional[str], context: TurnContext
     ) -> str:
         """Process a user message within an observability scope and return a response."""
         pass
diff --git a/python/perplexity/sample-agent/hosting.py b/python/perplexity/sample-agent/hosting.py
@@ -144,16 +144,21 @@ async def _typing_loop():
                 if self.auth_handler_name:
                     try:
                         recipient = context.activity.recipient
-                        tenant_id = getattr(recipient, "tenant_id", None) or ""
-                        agent_id = getattr(recipient, "agentic_app_id", None) or ""
+                        tenant_id = (getattr(recipient, "tenant_id", None) or "").strip()
+                        agent_id = (getattr(recipient, "agentic_app_id", None) or "").strip()
                         obs_token = await self.auth.exchange_token(
                             context,
                             scopes=get_observability_authentication_scope(),
                             auth_handler_id=self.auth_handler_name,
                         )
                         if obs_token and obs_token.token:
-                            cache_agentic_token(tenant_id, agent_id, obs_token.token)
-                            logger.info("Agentic token cached for observability exporter")
+                            if tenant_id and agent_id:
+                                cache_agentic_token(tenant_id, agent_id, obs_token.token)
+                                logger.info("Agentic token cached for observability exporter")
+                            else:
+                                logger.info(
+                                    "Skipping observability token cache because tenant_id or agent_id is missing"
+                                )
                     except Exception as token_err:
                         logger.warning("Failed to exchange/cache observability token: %s", token_err)
 
diff --git a/python/perplexity/sample-agent/mcp_tool_registration_service.py b/python/perplexity/sample-agent/mcp_tool_registration_service.py
@@ -290,6 +290,10 @@ async def _connect_server(server_config):
                     server_url,
                     exc,
                 )
+                try:
+                    await session.close()
+                except Exception:
+                    pass
                 return None
 
         results = await _asyncio.gather(
@@ -395,13 +399,17 @@ async def execute_tool(name: str, arguments: dict) -> str:
                 "Tool '%s' failed after %d attempts — clearing MCP cache",
                 name, _MCP_MAX_RETRIES + 1,
             )
-            svc._initialized = False
+            await svc._invalidate_cache()
             return f"Error executing tool '{name}': {last_error}"
 
         return execute_tool
 
-    async def close(self) -> None:
-        """Close all cached MCP sessions (call on server shutdown)."""
+    async def _invalidate_cache(self) -> None:
+        """Close existing MCP sessions and clear all cached state.
+
+        Called when retries are exhausted so the next turn reconnects
+        from scratch instead of appending duplicates.
+        """
         for s in self._sessions:
             try:
                 await s.close()
@@ -411,3 +419,7 @@ async def close(self) -> None:
         self._tool_map.clear()
         self._openai_tools.clear()
         self._initialized = False
+
+    async def close(self) -> None:
+        """Close all cached MCP sessions (call on server shutdown)."""
+        await self._invalidate_cache()
diff --git a/python/perplexity/sample-agent/perplexity_client.py b/python/perplexity/sample-agent/perplexity_client.py
@@ -31,6 +31,99 @@
 # Timeout (seconds) for a single Perplexity API call.
 _PER_ROUND_TIMEOUT = 30
 
+# Tool-selection threshold: when more tools than this are available,
+# make a fast preliminary call to pick only the relevant ones.
+_TOOL_SELECTION_THRESHOLD = 20
+
+# Maximum tools the selector may return.
+_TOOL_SELECTION_MAX = 15
+
+# Timeout (seconds) for the tool-selection call.
+_TOOL_SELECTION_TIMEOUT = 15
+
+
+async def select_relevant_tools(
+    client: AsyncOpenAI,
+    model: str,
+    user_message: str,
+    all_tools: list[dict],
+) -> list[dict]:
+    """Use a fast LLM call to pick only the tools relevant to *user_message*.
+
+    Returns a filtered subset (≤ ``_TOOL_SELECTION_MAX``) of *all_tools*.
+    On any failure the full list is returned so the main flow is never blocked.
+    """
+    # Build a compact one-line-per-tool catalog for the selector prompt.
+    catalog_lines: list[str] = []
+    for idx, t in enumerate(all_tools):
+        name = t.get("name", "unknown")
+        desc = (t.get("description") or "")[:120]
+        catalog_lines.append(f"{idx}: {name} — {desc}")
+    catalog = "\n".join(catalog_lines)
+
+    selector_prompt = (
+        "Given the user's request, select ONLY the tools needed to fulfill it.\n"
+        "Return a JSON array of tool index numbers (integers). Include tools that "
+        "might be needed for follow-up steps (e.g., if creating a document and sharing "
+        "a link, include both create and share tools).\n"
+        f"Select at most {_TOOL_SELECTION_MAX} tools. Return ONLY a JSON array like "
+        "[0, 3, 7], no explanation.\n\n"
+        f'User request: "{user_message}"\n\n'
+        f"Available tools:\n{catalog}"
+    )
+
+    try:
+        resp = await asyncio.wait_for(
+            client.responses.create(
+                model=model,
+                instructions="You are a tool selector. Return ONLY a JSON array of integers.",
+                input=selector_prompt,
+                store=False,
+            ),
+            timeout=_TOOL_SELECTION_TIMEOUT,
+        )
+
+        raw_text = ""
+        for item in resp.output:
+            if item.type == "message":
+                for c in getattr(item, "content", []):
+                    if hasattr(c, "text") and c.text:
+                        raw_text += c.text
+        if not raw_text:
+            raw_text = str(resp.output_text or "")
+
+        # Strip markdown fences and extract the JSON array.
+        raw_text = raw_text.strip().strip("`").strip()
+        if raw_text.startswith("json"):
+            raw_text = raw_text[4:].strip()
+
+        match = re.search(r"\[[\d,\s]+\]", raw_text)
+        if not match:
+            logger.warning("Tool selector returned unparseable response — using all tools")
+            return all_tools
+
+        indices: list[int] = json.loads(match.group())
+        selected = [all_tools[i] for i in indices if 0 <= i < len(all_tools)]
+
+        if not selected:
+            logger.warning("Tool selector returned empty set — using all tools")
+            return all_tools
+
+        logger.info(
+            "Tool selector narrowed %d → %d tools: %s",
+            len(all_tools),
+            len(selected),
+            [t.get("name") for t in selected],
+        )
+        return selected
+
+    except asyncio.TimeoutError:
+        logger.warning("Tool selector timed out (%ds) — using all tools", _TOOL_SELECTION_TIMEOUT)
+        return all_tools
+    except Exception as exc:
+        logger.warning("Tool selector failed (%s) — using all tools", exc)
+        return all_tools
+
 
 class PerplexityClient:
     """Async client for Perplexity AI using the Agent API (Responses API)."""
@@ -66,6 +159,11 @@ async def invoke(
         """
         logger.info("Invoking Perplexity model=%s (tools=%d)", self.model, len(tools or []))
 
+        # When too many tools are registered, use a fast selector call to
+        # narrow down to just the relevant ones before the main API request.
+        if tools and len(tools) > _TOOL_SELECTION_THRESHOLD:
+            tools = await select_relevant_tools(self._client, self.model, user_message, tools)
+
         create_kwargs: dict[str, Any] = {
             "model": self.model,
             "input": user_message,
@@ -107,7 +205,14 @@ async def invoke(
                     if ctx:
                         create_kwargs["input"] = f"{user_message}\n\n{ctx}"
                     tools = None
-                    response = await self._client.responses.create(**create_kwargs)
+                    try:
+                        response = await asyncio.wait_for(
+                            self._client.responses.create(**create_kwargs),
+                            timeout=_PER_ROUND_TIMEOUT,
+                        )
+                    except asyncio.TimeoutError:
+                        logger.warning("Perplexity API fallback round %d timed out (%ds) — returning partial answer", _round + 1, _PER_ROUND_TIMEOUT)
+                        break
                 else:
                     raise
 
@@ -186,9 +291,9 @@ async def invoke(
                 arguments = self._enrich_arguments(fc.name, arguments, user_message, tools or [])
 
                 logger.info("Executing MCP tool: %s (round %d)", fc.name, _round + 1)
-                logger.info("Tool arguments: %s", json.dumps(arguments, indent=2, default=str))
+                logger.debug("Tool arguments: %s", json.dumps(arguments, indent=2, default=str))
                 result = await tool_executor(fc.name, arguments)
-                logger.info("Tool result (first 500 chars): %.500s", json.dumps(result, default=str) if not isinstance(result, str) else result)
+                logger.debug("Tool result (first 500 chars): %.500s", json.dumps(result, default=str) if not isinstance(result, str) else result)
 
                 # Track resource creation/finalization generically
                 tool_lower = fc.name.lower()