From 3ee560a6bdf87786d178473ef66c78cf45358e34 Mon Sep 17 00:00:00 2001 From: K0K0V0K Date: Tue, 3 Mar 2026 14:11:28 +0100 Subject: [PATCH] YARN-11937. Yarn Proxy Behind a Reverse Proxy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Description When the Yarn Proxy is deployed behind a reverse proxy that is also used in application tracking URLs, the Yarn Proxy should redirect requests to that proxy instead of attempting to proxy them internally. Use Case Consider the following scenario: • A user runs a Spark job. • The Spark UI is hosted in the Spark History Server (SHS). • Multiple SHS instances are deployed for high availability (HA). • The tracking URL points to a Knox Gateway, which routes requests to the available SHS instances. This setup ensures high availability for the tracking UI. If one SHS instance becomes unavailable, another can continue serving the UI. Problem Statement When the Knox Gateway forwards a user’s HTTP request to the Yarn Proxy, the Yarn Proxy attempts to proxy the request back to the Knox Gateway. However, this proxied request does not include the JWT token. As a result, Knox initiates authentication instead of forwarding the request to the appropriate SHS instance. Proposed Solution For security reasons, the JWT token must not be forwarded to the tracking URL. Therefore, when an application registers a tracking URL that includes a specific flag indicating that it is served behind a reverse proxy, the Yarn Proxy should redirect the user directly to the tracking URL instead of attempting to proxy the request internally. Config New config was created: yarn.web-proxy.redirect-flag --- .../hadoop/yarn/conf/YarnConfiguration.java | 2 + .../src/main/resources/yarn-default.xml | 9 ++++ .../server/webproxy/WebAppProxyServlet.java | 29 +++++++++++ .../webproxy/TestWebAppProxyServlet.java | 48 +++++++++++++++++++ 4 files changed, 88 insertions(+) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 6871e4b2a219c..6064967ad15d6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -3026,6 +3026,8 @@ public static boolean isAclEnabled(Configuration conf) { public static final String PROXY_BIND_HOST = PROXY_PREFIX + "bind-host"; + public static final String PROXY_REDIRECT_FLAG = PROXY_PREFIX + "redirect-flag"; + /** * YARN Service Level Authorization */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 830cade703dee..b128d0f0ef14f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -2832,6 +2832,15 @@ 60000 + + Optional query parameter name that signals the YARN WebAppProxy to redirect + the user to the application's tracking URL instead of proxying the request. + When the tracking URL contains this flag with the value "true", the proxy + performs an HTTP redirect (302) to the tracking URL. + yarn.web-proxy.redirect-flag + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServlet.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServlet.java index 7817362885064..44be1b1513eb9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServlet.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServlet.java @@ -529,6 +529,35 @@ private void methodAction(final HttpServletRequest req, default: // fall out of the switch } + + /* + * If the application registered its tracking URL with the configured + * redirect flag, the proxy should not attempt + * to fetch the resource itself. Instead, it performs an HTTP redirect + * to the tracking URL. + * + * This is required for deployments where the tracking URL is served + * behind an external reverse proxy (for example Apache Knox) that is + * responsible for routing requests to multiple backend services + * such as Spark History Server instances in an HA setup. + * + * In such environments the YARN WebAppProxy cannot correctly proxy the + * request because the reverse proxy expects the request to originate + * directly from the user's browser and may require authentication + * context (e.g. a JWT) that the YARN proxy must not forward for + * security reasons. + * + * By redirecting the user instead of proxying the request, the browser + * sends a new request to the external reverse proxy which can then + * handle authentication and route the request to the appropriate + * backend service. + */ + String redirectFlagName = conf.get(YarnConfiguration.PROXY_REDIRECT_FLAG, ""); + if (!redirectFlagName.isBlank() && toFetch.getQuery().equals(redirectFlagName + "=true")) { + ProxyUtils.sendRedirect(req, resp, toFetch.toString()); + return; + } + Cookie c = null; if (userWasWarned && userApproved) { c = makeCheckCookie(id, true); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestWebAppProxyServlet.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestWebAppProxyServlet.java index 49b6a7954ba9d..33f7198e49380 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestWebAppProxyServlet.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestWebAppProxyServlet.java @@ -38,6 +38,7 @@ import javax.servlet.ServletConfig; import javax.servlet.ServletContext; import javax.servlet.ServletException; +import javax.servlet.ServletOutputStream; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; @@ -51,6 +52,7 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; +import org.mockito.ArgumentCaptor; import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -333,6 +335,47 @@ void testWebAppProxyConnectionTimeout() } + @Test + void testRedirectFlagProxyServlet() throws IOException, ServletException { + HttpServletRequest request = mock(HttpServletRequest.class); + when(request.getMethod()).thenReturn("GET"); + when(request.getRemoteUser()).thenReturn("dr.who"); + when(request.getPathInfo()).thenReturn("/application_00_0"); + when(request.getHeaderNames()).thenReturn(Collections.emptyEnumeration()); + + HttpServletResponse response = mock(HttpServletResponse.class); + when(response.getOutputStream()).thenReturn(mock(ServletOutputStream.class)); + when(response.getWriter()).thenReturn(mock(PrintWriter.class)); + WebAppProxyServlet servlet = new WebAppProxyServlet(); + ServletConfig config = mock(ServletConfig.class); + ServletContext context = mock(ServletContext.class); + when(config.getServletContext()).thenReturn(context); + AppReportFetcherForTest appReportFetcher = + new AppReportFetcherForTest(new YarnConfiguration()); + servlet.init(config); + when(config.getServletContext() + .getAttribute(WebAppProxy.FETCHER_ATTRIBUTE)) + .thenReturn(appReportFetcher); + + appReportFetcher.answer = 8; + + //Check if flag is on + YarnConfiguration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.PROXY_REDIRECT_FLAG, "yarn_knox_proxy"); + servlet.setConf(conf); + servlet.doGet(request, response); + + //Check if flag is off + conf.set(YarnConfiguration.PROXY_REDIRECT_FLAG, ""); + servlet.setConf(conf); + servlet.doGet(request, response); + + ArgumentCaptor statusCaptor = ArgumentCaptor.forClass(Integer.class); + Mockito.verify(response, Mockito.times(2)).setStatus(statusCaptor.capture()); + assertEquals(HttpServletResponse.SC_FOUND, statusCaptor.getAllValues().get(0)); + assertEquals(HttpServletResponse.SC_OK, statusCaptor.getAllValues().get(1)); + } + @Test @Timeout(5000) void testAppReportForEmptyTrackingUrl() throws Exception { @@ -654,6 +697,11 @@ public FetchedAppReport getApplicationReport(ApplicationId appId) result.getApplicationReport().setOriginalTrackingUrl("localhost:" + originalPort + "/foo/timeout?a=b#main"); return result; + } else if (answer == 8) { + FetchedAppReport result = getDefaultApplicationReport(appId); + result.getApplicationReport().setOriginalTrackingUrl("localhost:" + + originalPort + "/foo/bar?yarn_knox_proxy=true"); + return result; } return null; }