Azure · JiaqiZhang-Dev · May 28, 2026 · May 29, 2026 · Jun 2, 2026 · Jun 2, 2026
@@ -28,7 +28,6 @@
 from agent_framework import SkillsProvider
 from agent_framework import ToolResultCompactionStrategy
 from agent_framework_foundry_hosting import ResponsesHostServer
-from opentelemetry import trace as otel_trace
 
 import config.app_config as app_config
 from config.app_config import get as cfg
@@ -39,8 +38,6 @@
 from tools.pipeline_tools import PipelineTools
 from skills.tenant_skills import create_tenant_skills
 from utils.azure_ai_foundry import (
-    FoundryAgentSpanEnricher,
-    SpanAttributeTruncator,
     get_agent_client,
     get_project_client,
 )
@@ -138,7 +135,7 @@ async def _init_mcp(factory):
 
     # Init Skills
     skills = create_tenant_skills()
-    skills_provider = SkillsProvider(skills=skills)
+    skills_provider = SkillsProvider(skills)
 
     reasoning_effort = cfg("AI_FOUNDRY_AGENT_REASONING_EFFORT")
     agent = Agent(
@@ -156,19 +153,6 @@ async def _init_mcp(factory):
     )
 
     server = ResponsesHostServer(agent)
-
-    # Init TracerProvider (auto-configured by ResponsesHostServer)
-    foundry_project_id = os.environ.get("AI_FOUNDRY_PROJECT_RESOURCE_ID", "")
-    provider = otel_trace.get_tracer_provider()
-    if hasattr(provider, "add_span_processor"):
-        # Truncate oversized span attributes so App Insights doesn't
-        # silently drop spans that exceed the 65 KB item limit.
-        provider.add_span_processor(SpanAttributeTruncator())
-        if foundry_project_id:
-            provider.add_span_processor(
-                FoundryAgentSpanEnricher(foundry_project_id, agent_name, agent_id)
-            )
-
     await server.run_async()
 
 

@@ -51,6 +51,7 @@ Route every message to exactly one of these paths:
 
 - Load the matching skill for domain questions to get guideline, tenant ID, and knowledge sources.
 - `typespec-authoring` may ONLY be loaded when `[tenant_context]` contains `original_tenant_id=azure_typespec_authoring`. Otherwise use `typespec`.
+- **Authoring tenant lock (overrides rules below)**: when `original_tenant_id=azure_typespec_authoring`, load ONLY `typespec-authoring` and search ONLY with its `tenant_id` — no other skills, no other tenants, even for multi-topic questions.
 - `[tenant_context]` is a **default**, not a constraint — load a more appropriate skill if the question doesn't match.
 - Multi-topic questions: load multiple skills and search with each `tenant_id` separately.
 

@@ -72,6 +72,9 @@ class TenantID(str, Enum):
 SRC_AZURE_SDK_DOCS_ENG = "azure-sdk-docs-eng"
 SRC_AZURE_SDK_INTERNAL_WIKI = "azure-sdk-internal-wiki"
 
+# -- SDK tools --
+SRC_AZURE_SDK_TOOLS_DOCS = "azure_sdk_tools_docs"
+
 # -- General Azure & review resources --
 SRC_STATIC_AZURE_DOCS = "static_azure_docs"
 SRC_STATIC_API_SPEC_VIEW_QA = "static_api_spec_view_qa"
@@ -248,6 +251,12 @@ def _register(*sources: KnowledgeSource) -> None:
         name=SRC_STATIC_API_SPEC_VIEW_QA,
         description="Historical Q&A for API specification review covering common validation errors and fixes.",
     ),
+    # -- SDK tools --
+    KnowledgeSource(
+        name=SRC_AZURE_SDK_TOOLS_DOCS,
+        description="Azure SDK tools documentation covering js-sdk-release-tools and related JavaScript SDK tooling.",
+        base_url="https://github.com/Azure/azure-sdk-tools/blob/main/",
+    ),
 )
 
 
@@ -449,6 +458,7 @@ def _sources(*names: str) -> list[KnowledgeSource]:
             SRC_AZURE_SDK_DOCS_ENG,
             SRC_TYPESPEC_AZURE_DOCS,
             SRC_AZURE_REST_API_SPECS_WIKI,
+            SRC_AZURE_SDK_TOOLS_DOCS,
         ),
         source_filter={
             SRC_AZURE_SDK_GUIDELINES: "search.ismatch('typescript', 'title')",

@@ -78,12 +78,6 @@ extends:
 
                     pip install -r requirements.txt --retries 5 --timeout 60
 
-                    export AI_FOUNDRY_PROJECT_RESOURCE_ID=$(az resource list \
-                      --name "$AI_FOUNDRY_ACCOUNT_NAME/$AI_FOUNDRY_PROJECT_NAME" \
-                      --resource-type Microsoft.CognitiveServices/accounts/projects \
-                      --query "[0].id" -o tsv)
-                    echo "Project resource ID: $AI_FOUNDRY_PROJECT_RESOURCE_ID"
-
                     python scripts/deploy_hosted_agent.py \
                       ${{ parameters.agentName }} \
                       --tag "$(Build.BuildId)" \
@@ -94,5 +88,4 @@ extends:
                   AI_FOUNDRY_PROJECT_NAME: $(AI_FOUNDRY_PROJECT_NAME)
                   ACR_NAME: $(ACR_NAME)
                   ACR_RESOURCE_GROUP: $(ACR_RESOURCE_GROUP)
-                  AI_FOUNDRY_PROJECT_RESOURCE_ID: $(AI_FOUNDRY_PROJECT_RESOURCE_ID)
                   SYSTEM_ACCESSTOKEN: $(System.AccessToken)
@@ -20,7 +20,7 @@
       ]
     },
     "serverBaseUrl": {
-      "value": "https://azuresdkqabot-server-c8czabhzhweadwgu.westus2-01.azurewebsites.net"
+      "value": "https://azuresdkqabot-server-agent-btg3c4hpa7faapen.westus2-01.azurewebsites.net"
     },
     "serverAudience": {
       "value": "899da762-d510-48f2-911a-db9ea0cc41fd"

@@ -1,4 +1,7 @@
 -r requirements.txt
 debugpy
-# Agent development CLI tool (preview)
-agent-dev-cli==0.0.1b260427
+# Agent development CLI tool (preview) — install separately with:
+#   pip install agent-dev-cli --pre --no-deps
+# It pins agent-framework-core<1.3.0 which conflicts with 1.7.0+.
+# Using --no-deps is safe because its only real dependency (starlette)
+# is already pulled in transitively.
@@ -1,19 +1,19 @@
-agent-framework-foundry-hosting==1.0.0a260429
-azure-ai-projects>=2.1.0
-azure-search-documents==11.7.0b2
+agent-framework-foundry-hosting==1.0.0a260528
+azure-ai-projects>=2.2.0
+azure-search-documents==12.0.0
 azure-appconfiguration>=1.8.0
-azure-cosmos>=4.9.0,<4.16.0b1
+azure-cosmos>=4.9.0
 azure-identity==1.26.0b2
 azure-keyvault-keys>=4.11.0
 azure-keyvault-secrets>=4.9.0
-azure-storage-blob==12.30.0b1
+azure-storage-blob==12.29.0
 et-xmlfile>=2.0.0
 openpyxl==3.1.5
 python-dotenv>=1.2.2
 pyyaml>=6.0.3
 httpx[http2]>=0.27.0,<1.0.dev1
-agent-framework-core==1.2.2
-agent-framework-foundry==1.2.2
+agent-framework-core==1.7.0
+agent-framework-foundry==1.7.0
 mcp>=1.0.0
 # Backend server
 fastapi

@@ -34,6 +34,7 @@
 from azure.ai.projects import AIProjectClient
 from azure.ai.projects.models import (
     AgentProtocol,
+    ContainerConfiguration,
     HostedAgentDefinition,
     ProtocolVersionRecord,
 )
@@ -204,9 +205,6 @@ def main() -> None:
     image = f"{registry}/{image_name}:{tag}"
     dockerfile = _PROJECT_DIR / "agents" / args.agent_name / "Dockerfile"
 
-    # Project resource ID is used as an env var inside the container for telemetry.
-    project_resource_id = os.environ.get("AI_FOUNDRY_PROJECT_RESOURCE_ID", "").strip()
-
     acr_name = registry.split(".")[0]
 
     # Check if the image tag already exists in ACR
@@ -269,19 +267,18 @@ def main() -> None:
             "AZURE_APPCONFIG_ENDPOINT": appconfig_endpoint,
             "ENABLE_INSTRUMENTATION": "true",
             "APP_VERSION": next_version,
-            "AI_FOUNDRY_PROJECT_RESOURCE_ID": project_resource_id,
         }
         agent = project.agents.create_version(
             agent_name=image_name,
             definition=HostedAgentDefinition(
-                container_protocol_versions=[
+                cpu="2",
+                memory="4Gi",
+                container_configuration=ContainerConfiguration(image=image),
+                protocol_versions=[
                     ProtocolVersionRecord(
                         protocol=AgentProtocol.RESPONSES, version="1.0.0"
                     )
                 ],
-                cpu="2",
-                memory="4Gi",
-                image=image,
                 environment_variables=env_vars,
             ),
             metadata={"enableVnextExperience": "true"},

@@ -54,6 +54,19 @@
 COMPACT_THRESHOLD = 100000
 """Token count at which conversation history is compacted."""
 
+# -- Bot identity constants -----------------------------------------------
+BOT_SENDER_ID = "azure-sdk-qa-bot"
+BOT_SENDER_NAME = "Azure SDK Q&A Bot"
+
+# -- Fallback error message when agent returns empty text -----------------
+EMPTY_RESPONSE_MESSAGE = (
+    "Sorry, something went wrong and I couldn't generate a response. "
+    "Please send your message again to retry."
+)
+
+# -- Stream event types ---------------------------------------------------
+STREAM_EVENT_RESPONSE_COMPLETED = "response.completed"
+
 _CITATION_RE = re.compile(r"[^\w\s]*cite[^\w\s]*turn\d+\S*")
 
 
@@ -122,32 +135,42 @@ async def chat(self, req: ChatRequest) -> ChatResponse:
         )
 
         # Process additional info (images, links, text) from the frontend.
-        image_items = await self._build_image_items(req.additional_infos or [])
-        conversation_items.extend(image_items)
+        additional_items = await self._build_additional_info_items(
+            req.additional_infos or []
+        )
+        conversation_items.extend(additional_items)
 
         agent_ref: dict = {
             "name": agent.name,
             "version": agent.version,
             "type": AgentReferenceType.agent_reference.value,
         }
 
-        # Streaming is broken for hosted agents — text is lost entirely.
-        # See https://github.com/Azure/azure-sdk-for-python/issues/45282
-        # and https://github.com/Azure/azure-sdk-for-python/issues/46015
-        raw_response = await openai_client.responses.with_raw_response.create(
+        stream = await openai_client.responses.create(
             input=conversation_items,
             conversation=agent_conversation_id,
             store=True,
-            stream=False,
+            stream=True,
             extra_body={
                 "agent_reference": agent_ref,
             },
         )
-        response: OpenAIResponse = raw_response.parse()
+
+        response: OpenAIResponse | None = None
+        async for event in stream:
+            logger.debug("Stream event: type=%s, content=%s", event.type, event)
+            if event.type == STREAM_EVENT_RESPONSE_COMPLETED:
+                response = event.response
+                break
+
+        if response is None:
+            raise RuntimeError("Agent stream ended without a response.completed event")
 
         # Extract AI Foundry trace ID from x-request-id header.
         # The header may contain duplicated values separated by comma.
-        x_request_id = raw_response.headers.get("x-request-id", "")
+        x_request_id = ""
+        if hasattr(stream, "response") and stream.response:
+            x_request_id = stream.response.headers.get("x-request-id", "")
         trace_id = x_request_id.split(",")[0].strip() if x_request_id else None
         logger.info(
             "Agent trace: trace_id=%s, response_id=%s, conversation=%s",
@@ -253,8 +276,8 @@ async def _save_bot_answer_to_conversation(
             id=f"bot-{response_id}",
             tenant_id=req.tenant_id.value,
             sender_role=Role.System,
-            sender_id="azure-sdk-qa-bot",
-            sender_name="Azure SDK Q&A Bot",
+            sender_id=BOT_SENDER_ID,
+            sender_name=BOT_SENDER_NAME,
             content=content,
             created_at=datetime.now(timezone.utc),
             conversation_id=req.conversation_id,
@@ -326,34 +349,59 @@ async def _resolve_conversation(
         return new_id, True
 
     @staticmethod
-    async def _build_image_items(
+    async def _build_additional_info_items(
         infos: list[AdditionalInfo],
     ) -> list[ResponseInputItemParam]:
-        """Convert image additional_infos into Responses API input items."""
+        """Convert additional_infos into Responses API input items.
+
+        Handles both text and image types:
+        - **Text**: injected as a user message so the LLM sees project context
+          (e.g. TypeSpec project state, intake analysis, .tsp code snippets).
+        - **Image**: fetched and converted to data-URI input_image items.
+        """
         items: list[ResponseInputItemParam] = []
         for info in infos:
-            if info.type != AdditionalInfoType.Image or not info.link:
-                continue
-            try:
-                data_uri = await get_image_data_uri(info.link)
-            except Exception:
-                logger.warning(
-                    "Failed to fetch Teams image: %s", info.link, exc_info=True
+            if info.type == AdditionalInfoType.Text and info.content:
+                content = info.content
+                max_chars = int(cfg("AOAI_CHAT_MAX_TOKENS", "100000"))
+                if len(content) > max_chars:
+                    logger.warning(
+                        "Text additional_info is large (%d chars, limit %d)",
+                        len(content),
+                        max_chars,
+                    )
+                items.append(
+                    cast(
+                        ResponseInputItemParam,
+                        ConversationItem(
+                            role=Role.User,
+                            content=info.content,
+                        ).model_dump(mode="json", exclude_none=True),
+                    )
+                )
+            elif info.type == AdditionalInfoType.Image and info.link:
+                try:
+                    data_uri = await get_image_data_uri(info.link)
+                except Exception:
+                    logger.warning(
+                        "Failed to fetch Teams image: %s",
+                        info.link,
+                        exc_info=True,
+                    )
+                    continue
+                items.append(
+                    {
+                        "type": "message",
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "input_image",
+                                "image_url": data_uri,
+                                "detail": "auto",
+                            },
+                        ],
+                    }
                 )
-                continue
-            items.append(
-                {
-                    "type": "message",
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "input_image",
-                            "image_url": data_uri,
-                            "detail": "auto",
-                        },
-                    ],
-                }
-            )
         return items
 
     def _build_tenant_system_message(self, tenant_id: TenantID) -> str:
@@ -396,10 +444,7 @@ def _postprocess(
 
         output_text = response.output_text or ""
         if not output_text:
-            output_text = (
-                "Sorry, something went wrong and I couldn't generate a response. "
-                "Please send your message again to retry."
-            )
+            output_text = EMPTY_RESPONSE_MESSAGE
             logger.error(
                 "Empty output_text for response %s (status=%s), returning error message",
                 response.id,

@@ -11,7 +11,7 @@
 
 import logging
 
-from agent_framework import Skill
+from agent_framework import InlineSkill, Skill, SkillFrontmatter
 
 from config.tenant_config import (
     TenantID,
@@ -105,10 +105,12 @@ def create_tenant_skills() -> list[Skill]:
             logger.warning("Skipping skill %s: no content", skill_name)
             continue
         skills.append(
-            Skill(
-                name=skill_name,
-                description=description,
-                content=content,
+            InlineSkill(
+                frontmatter=SkillFrontmatter(
+                    name=skill_name,
+                    description=description,
+                ),
+                instructions=content,
             )
         )
         logger.info("Created skill: %s (tenant=%s)", skill_name, tenant_id.value)