From 7b57c3e792d18576bbb4b686b82d4d8f89e5ba6d Mon Sep 17 00:00:00 2001 From: huangzhaobo Date: Sun, 15 Mar 2026 02:12:20 +0800 Subject: [PATCH 1/3] HADOOP-19838. Support parsing environment variables and system properties in the Configuration class. --- hadoop-common-project/hadoop-common/pom.xml | 5 + .../org/apache/hadoop/conf/Configuration.java | 114 ++++++++++++++++++ .../src/site/markdown/UnixShellGuide.md | 12 +- .../apache/hadoop/conf/TestConfiguration.java | 36 ++++++ hadoop-project/pom.xml | 10 +- 5 files changed, 175 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 35ac06c826c52..3a6a396e1a118 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -390,6 +390,11 @@ junit-platform-launcher test + + com.github.stefanbirkner + system-lambda + test + org.glassfish.jaxb jaxb-runtime diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index 2e112eed8c893..9620af41a8afc 100755 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -245,6 +245,9 @@ public class Configuration implements Iterable>, private boolean restrictSystemProps = restrictSystemPropsDefault; private boolean allowNullValueProperties = false; + private static final String HADOOP_CLIENT_OPTS = "HADOOP_CLIENT_OPTS"; + private static final String HADOOP_PROPERTY_PREFIX = "hadoop.property."; + private static class Resource { private final Object resource; private final String name; @@ -2934,6 +2937,7 @@ protected synchronized Properties getProps() { if (properties == null) { properties = new Properties(); loadProps(properties, 0, true); + loadOtherProps(properties); } return properties; } @@ -2967,6 +2971,116 @@ private synchronized void loadProps(final Properties props, } } + /** + * Loads configuration properties from both environment variables and Java system properties. + *

+ * Java system properties (-D) take precedence over environment variables. + * To ensure this overriding behavior, environment variables are loaded first, + * followed by system properties. + * + * @param properties the object containing the loaded properties. + */ + private void loadOtherProps(Properties properties) { + // Load environment variables first (lower precedence) + loadEnvironmentVariables(properties); + // Load system properties (higher precedence, may override) + loadSystemProperties(properties); + } + + /** + * Loads Hadoop configuration properties from the "HADOOP_CLIENT_OPTS" environment variable. + *

+ * This method extracts {@code -Dkey=value or -D key=value} style strings from the value of the + * {@code HADOOP_CLIENT_OPTS} environment variable, enabling override of XML configuration values + * (e.g., {@code hdfs-site.xml}, {@code core-site.xml}). + *

+ * Example:
+ * Given {@code hdfs-site.xml}: + *

{@code
+   * 
+   *   dfs.client.socket-timeout
+   *   60000
+   * 
+   * }
+ * Setting the environment variable: + *
{@code export HADOOP_CLIENT_OPTS="-Xmx1g -Ddfs.client.socket-timeout=30000"}
+ * will extract {@code dfs.client.socket-timeout=30000} and override the XML config at runtime. + *

+ * Processing Rules: + *

    + *
  • {@code -Dkey=value} format.
  • + *
  • {@code -D key=value} format.
  • + *
+ * + * @param properties the object containing the loaded properties. + */ + private void loadEnvironmentVariables(Properties properties) { + // Retrieve the HADOOP_CLIENT_OPTS environment variable + String hadoopClientOpts = System.getenv(HADOOP_CLIENT_OPTS); + // Skip if variable is not set or empty + if (hadoopClientOpts == null || hadoopClientOpts.trim().isEmpty()) { + return; + } + // Split by whitespace (handles multiple spaces/tabs) + String[] tokens = hadoopClientOpts.trim().split("\\s+"); + for (int i = 0; i < tokens.length; i++) { + String token = tokens[i]; + if (token == null) { + continue; + } + String keyValueStr = null; + if (token.equals("-D")) { + if (i < tokens.length - 1 && tokens[i + 1].contains("=")) { + keyValueStr = tokens[++i]; + } + } else if (token.startsWith("-D") && token.contains("=")) { + keyValueStr = token; + } + if (keyValueStr != null) { + String rawPair = keyValueStr.startsWith("-D") ? keyValueStr.substring(2) : keyValueStr; + int eqIndex = rawPair.indexOf('='); + String key = rawPair.substring(0, eqIndex); + String value = rawPair.substring(eqIndex + 1).trim(); + loadProperty(properties, "env", key, value, false, new String[] {"env-property"}); + } + } + } + + /** + * Loads Hadoop configuration properties from Java system properties. + *

+ * This method enables overriding values defined in XML configuration files (e.g., {@code hdfs-site.xml}, + * {@code core-site.xml}) via JVM system properties, with in-memory values taking precedence. + *

+ * Naming Rule: System properties must start with {@link #HADOOP_PROPERTY_PREFIX}. + * The substring after the prefix is used as the actual configuration key. + *

+ * Example:
+ * Given {@code hdfs-site.xml}: + *

{@code
+   * 
+   *   dfs.client.socket-timeout
+   *   60000
+   * 
+   * }
+ * Launching the application with: + *
{@code java -Dhadoop.property.dfs.client.socket-timeout=30000 -jar application.jar}
+ * will override the XML config, making the runtime value {@code 30000}. + * + * @param properties the object containing the loaded properties. + */ + private void loadSystemProperties(Properties properties) { + Properties systemProperties = System.getProperties(); + for (Map.Entry item : systemProperties.entrySet()) { + String key = (String) item.getKey(); + if (key.startsWith(HADOOP_PROPERTY_PREFIX)) { + String attr = key.substring(HADOOP_PROPERTY_PREFIX.length()); + String value = (String) item.getValue(); + loadProperty(properties, "system", attr, value, false, new String[] {"system-property"}); + } + } + } + /** * Return the number of keys in the configuration. * diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md b/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md index ca32fd8ee2f95..80c6252d8208a 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md @@ -26,8 +26,18 @@ Apache Hadoop has many environment variables that control various aspects of the This environment variable is used for all end-user, non-daemon operations. It can be used to set any Java options as well as any Apache Hadoop options via a system property definition. For example: +Set the client socket timeout via dfs.client.socket-timeout ```bash -HADOOP_CLIENT_OPTS="-Xmx1g -Dhadoop.socks.server=localhost:4000" hadoop fs -ls /tmp +export HADOOP_CLIENT_OPTS="-Xmx1g -Ddfs.client.socket-timeout=30000" + +hadoop fs -ls hdfs://localhost:8020/tmp +``` + +Define the default filesystem via fs.defaultFS +```bash +export HADOOP_CLIENT_OPTS="-Xmx1g -Dfs.defaultFS=hdfs://localhost:8020/" + +hadoop fs -ls /tmp ``` will increase the memory and send this command via a SOCKS proxy server. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java index 56ec25ebd8da8..e6006e243fe69 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java @@ -52,6 +52,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; + +import com.github.stefanbirkner.systemlambda.SystemLambda; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -62,6 +64,7 @@ import static org.apache.hadoop.conf.StorageUnit.KB; import static org.apache.hadoop.conf.StorageUnit.MB; import static org.apache.hadoop.conf.StorageUnit.TB; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -2764,4 +2767,37 @@ public void testConcurrentModificationDuringIteration() throws InterruptedExcept assertFalse(exceptionOccurred.get(), "ConcurrentModificationException occurred"); } + + @Test + public void testLoadEnvironmentVariables() throws Exception { + // Test valid configurations: both "-Dkey=value" and "-D key=value" formats + SystemLambda.withEnvironmentVariable("HADOOP_CLIENT_OPTS", + "-Ddfs.client.socket-timeout=30000 -D dfs.datanode.socket.write.timeout=240000") + .execute(() -> { + Configuration conf = new Configuration(); + assertThat(conf.get("dfs.client.socket-timeout")).isEqualTo("30000"); + assertThat(conf.get("dfs.datanode.socket.write.timeout")).isEqualTo("240000"); + }); + // Test invalid configurations: "-X" formats should be ignored + SystemLambda.withEnvironmentVariable("HADOOP_CLIENT_OPTS", + "-Xdfs.client.socket-timeout=30000 -X dfs.datanode.socket.write.timeout=240000") + .execute(() -> { + Configuration conf = new Configuration(); + assertThat(conf.get("dfs.client.socket-timeout")).isNull(); + assertThat(conf.get("dfs.datanode.socket.write.timeout")).isNull(); + }); + } + + @Test + public void testLoadSystemProperties() { + Configuration conf = new Configuration(); + assertThat(conf.get("dfs.client.socket-timeout")).isNull(); + // Set valid property with correct prefix + System.setProperty("hadoop.property.dfs.client.socket-timeout", "30000"); + // Set invalid property without correct prefix (should be ignored) + System.setProperty("invalid.hadoop.property.dfs.datanode.socket.write.timeout", "240000"); + conf = new Configuration(); + assertThat(conf.get("dfs.client.socket-timeout")).isEqualTo("30000"); + assertThat(conf.get("dfs.datanode.socket.write.timeout")).isNull(); + } } diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 15c3df947c33d..42d8ef32f819c 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -240,6 +240,8 @@ v1.22.5 1.10.13 1.20 + 1.18.0 + 1.2.1 @@ -1212,7 +1214,7 @@ com.github.stefanbirkner system-rules - 1.18.0 + ${system-rules.version} junit @@ -1224,6 +1226,12 @@ + + com.github.stefanbirkner + system-lambda + ${system-lambda.version} + test + org.apache.commons commons-collections4 From ac77d350f9b9f655115801ce45f67149667f5221 Mon Sep 17 00:00:00 2001 From: huangzhaobo Date: Sun, 15 Mar 2026 20:38:05 +0800 Subject: [PATCH 2/3] fix style --- .../org/apache/hadoop/conf/Configuration.java | 48 +++++++------------ .../src/site/markdown/UnixShellGuide.md | 4 +- .../apache/hadoop/conf/TestConfiguration.java | 26 +++++----- hadoop-project/pom.xml | 2 +- 4 files changed, 34 insertions(+), 46 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index 9620af41a8afc..dc8419fae7efc 100755 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -2978,30 +2978,23 @@ private synchronized void loadProps(final Properties props, * To ensure this overriding behavior, environment variables are loaded first, * followed by system properties. * - * @param properties the object containing the loaded properties. + * @param props the object containing the loaded properties. */ - private void loadOtherProps(Properties properties) { + private void loadOtherProps(Properties props) { // Load environment variables first (lower precedence) - loadEnvironmentVariables(properties); + loadEnvironmentVariables(props); // Load system properties (higher precedence, may override) - loadSystemProperties(properties); + loadSystemProperties(props); } /** * Loads Hadoop configuration properties from the "HADOOP_CLIENT_OPTS" environment variable. *

- * This method extracts {@code -Dkey=value or -D key=value} style strings from the value of the - * {@code HADOOP_CLIENT_OPTS} environment variable, enabling override of XML configuration values + * This method extracts {@code -Dkey=value or -D key=value} style strings from the value of the + * {@code HADOOP_CLIENT_OPTS} environment variable, enabling override of XML configuration values * (e.g., {@code hdfs-site.xml}, {@code core-site.xml}). *

- * Example:
- * Given {@code hdfs-site.xml}: - *

{@code
-   * 
-   *   dfs.client.socket-timeout
-   *   60000
-   * 
-   * }
+ * Example: Given {@code hdfs-site.xml}: key: dfs.client.socket-timeout, value: 60000, * Setting the environment variable: *
{@code export HADOOP_CLIENT_OPTS="-Xmx1g -Ddfs.client.socket-timeout=30000"}
* will extract {@code dfs.client.socket-timeout=30000} and override the XML config at runtime. @@ -3015,13 +3008,10 @@ private void loadOtherProps(Properties properties) { * @param properties the object containing the loaded properties. */ private void loadEnvironmentVariables(Properties properties) { - // Retrieve the HADOOP_CLIENT_OPTS environment variable String hadoopClientOpts = System.getenv(HADOOP_CLIENT_OPTS); - // Skip if variable is not set or empty if (hadoopClientOpts == null || hadoopClientOpts.trim().isEmpty()) { return; } - // Split by whitespace (handles multiple spaces/tabs) String[] tokens = hadoopClientOpts.trim().split("\\s+"); for (int i = 0; i < tokens.length; i++) { String token = tokens[i]; @@ -3049,34 +3039,28 @@ private void loadEnvironmentVariables(Properties properties) { /** * Loads Hadoop configuration properties from Java system properties. *

- * This method enables overriding values defined in XML configuration files (e.g., {@code hdfs-site.xml}, - * {@code core-site.xml}) via JVM system properties, with in-memory values taking precedence. + * This method enables overriding values defined in XML configuration files + * (e.g., {@code hdfs-site.xml}, {@code core-site.xml}) via JVM system properties, + * with in-memory values taking precedence. *

- * Naming Rule: System properties must start with {@link #HADOOP_PROPERTY_PREFIX}. + * Naming Rule: System properties must start with {@link #HADOOP_PROPERTY_PREFIX}. * The substring after the prefix is used as the actual configuration key. *

- * Example:
- * Given {@code hdfs-site.xml}: - *

{@code
-   * 
-   *   dfs.client.socket-timeout
-   *   60000
-   * 
-   * }
+ * Example: Given {@code hdfs-site.xml}: key: dfs.client.socket-timeout, value: 60000, * Launching the application with: *
{@code java -Dhadoop.property.dfs.client.socket-timeout=30000 -jar application.jar}
- * will override the XML config, making the runtime value {@code 30000}. + * will extract {@code dfs.client.socket-timeout=30000} and override the XML config at runtime. * - * @param properties the object containing the loaded properties. + * @param props the object containing the loaded properties. */ - private void loadSystemProperties(Properties properties) { + private void loadSystemProperties(Properties props) { Properties systemProperties = System.getProperties(); for (Map.Entry item : systemProperties.entrySet()) { String key = (String) item.getKey(); if (key.startsWith(HADOOP_PROPERTY_PREFIX)) { String attr = key.substring(HADOOP_PROPERTY_PREFIX.length()); String value = (String) item.getValue(); - loadProperty(properties, "system", attr, value, false, new String[] {"system-property"}); + loadProperty(props, "system", attr, value, false, new String[] {"system-property"}); } } } diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md b/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md index 80c6252d8208a..7bb2aaaf84b23 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md @@ -28,14 +28,14 @@ This environment variable is used for all end-user, non-daemon operations. It c Set the client socket timeout via dfs.client.socket-timeout ```bash -export HADOOP_CLIENT_OPTS="-Xmx1g -Ddfs.client.socket-timeout=30000" +export HADOOP_CLIENT_OPTS="-Xmx1g -Ddfs.client.socket-timeout=30000" hadoop fs -ls hdfs://localhost:8020/tmp ``` Define the default filesystem via fs.defaultFS ```bash -export HADOOP_CLIENT_OPTS="-Xmx1g -Dfs.defaultFS=hdfs://localhost:8020/" +export HADOOP_CLIENT_OPTS="-Xmx1g -Dfs.defaultFS=hdfs://localhost:8020/" hadoop fs -ls /tmp ``` diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java index e6006e243fe69..c319f8d930ca2 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java @@ -2774,30 +2774,34 @@ public void testLoadEnvironmentVariables() throws Exception { SystemLambda.withEnvironmentVariable("HADOOP_CLIENT_OPTS", "-Ddfs.client.socket-timeout=30000 -D dfs.datanode.socket.write.timeout=240000") .execute(() -> { - Configuration conf = new Configuration(); - assertThat(conf.get("dfs.client.socket-timeout")).isEqualTo("30000"); - assertThat(conf.get("dfs.datanode.socket.write.timeout")).isEqualTo("240000"); + Configuration configuration = new Configuration(); + assertThat(configuration.get("dfs.client.socket-timeout")).isEqualTo("30000"); + assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isEqualTo("240000"); }); // Test invalid configurations: "-X" formats should be ignored SystemLambda.withEnvironmentVariable("HADOOP_CLIENT_OPTS", "-Xdfs.client.socket-timeout=30000 -X dfs.datanode.socket.write.timeout=240000") .execute(() -> { - Configuration conf = new Configuration(); - assertThat(conf.get("dfs.client.socket-timeout")).isNull(); - assertThat(conf.get("dfs.datanode.socket.write.timeout")).isNull(); + Configuration configuration = new Configuration(); + assertThat(configuration.get("dfs.client.socket-timeout")).isNull(); + assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull(); }); + // There are no environment variables + Configuration configuration = new Configuration(); + assertThat(configuration.get("dfs.client.socket-timeout")).isNull(); + assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull(); } @Test public void testLoadSystemProperties() { - Configuration conf = new Configuration(); - assertThat(conf.get("dfs.client.socket-timeout")).isNull(); + Configuration configuration = new Configuration(); + assertThat(configuration.get("dfs.client.socket-timeout")).isNull(); // Set valid property with correct prefix System.setProperty("hadoop.property.dfs.client.socket-timeout", "30000"); // Set invalid property without correct prefix (should be ignored) System.setProperty("invalid.hadoop.property.dfs.datanode.socket.write.timeout", "240000"); - conf = new Configuration(); - assertThat(conf.get("dfs.client.socket-timeout")).isEqualTo("30000"); - assertThat(conf.get("dfs.datanode.socket.write.timeout")).isNull(); + configuration = new Configuration(); + assertThat(configuration.get("dfs.client.socket-timeout")).isEqualTo("30000"); + assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull(); } } diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 42d8ef32f819c..343475e5d88a7 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -241,7 +241,7 @@ 1.10.13 1.20 1.18.0 - 1.2.1 + 1.2.1 From 54e9dc4b42ae5f1eb1c0a16ae302f5970491f250 Mon Sep 17 00:00:00 2001 From: huangzhaobo Date: Mon, 16 Mar 2026 00:37:57 +0800 Subject: [PATCH 3/3] fix style --- .../org/apache/hadoop/conf/Configuration.java | 33 ++++++++++--------- .../hadoop/fs/CommonConfigurationKeys.java | 8 +++++ .../src/main/resources/core-default.xml | 10 ++++++ .../apache/hadoop/conf/TestConfiguration.java | 15 +++++++++ 4 files changed, 50 insertions(+), 16 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index dc8419fae7efc..c964faa30e018 100755 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -2981,9 +2981,7 @@ private synchronized void loadProps(final Properties props, * @param props the object containing the loaded properties. */ private void loadOtherProps(Properties props) { - // Load environment variables first (lower precedence) loadEnvironmentVariables(props); - // Load system properties (higher precedence, may override) loadSystemProperties(props); } @@ -2991,23 +2989,22 @@ private void loadOtherProps(Properties props) { * Loads Hadoop configuration properties from the "HADOOP_CLIENT_OPTS" environment variable. *

* This method extracts {@code -Dkey=value or -D key=value} style strings from the value of the - * {@code HADOOP_CLIENT_OPTS} environment variable, enabling override of XML configuration values - * (e.g., {@code hdfs-site.xml}, {@code core-site.xml}). + * {@code HADOOP_CLIENT_OPTS} environment variable. *

* Example: Given {@code hdfs-site.xml}: key: dfs.client.socket-timeout, value: 60000, * Setting the environment variable: *

{@code export HADOOP_CLIENT_OPTS="-Xmx1g -Ddfs.client.socket-timeout=30000"}
- * will extract {@code dfs.client.socket-timeout=30000} and override the XML config at runtime. + * will extract {@code dfs.client.socket-timeout=30000} and override the XML files at runtime. *

- * Processing Rules: + * String format: *

    - *
  • {@code -Dkey=value} format.
  • - *
  • {@code -D key=value} format.
  • + *
  • {@code -Dkey=value}
  • + *
  • {@code -D key=value}
  • *
* - * @param properties the object containing the loaded properties. + * @param props the object containing the loaded properties. */ - private void loadEnvironmentVariables(Properties properties) { + private void loadEnvironmentVariables(Properties props) { String hadoopClientOpts = System.getenv(HADOOP_CLIENT_OPTS); if (hadoopClientOpts == null || hadoopClientOpts.trim().isEmpty()) { return; @@ -3031,7 +3028,7 @@ private void loadEnvironmentVariables(Properties properties) { int eqIndex = rawPair.indexOf('='); String key = rawPair.substring(0, eqIndex); String value = rawPair.substring(eqIndex + 1).trim(); - loadProperty(properties, "env", key, value, false, new String[] {"env-property"}); + loadProperty(props, "env", key, value, false, new String[] {"env-property"}); } } } @@ -3039,21 +3036,25 @@ private void loadEnvironmentVariables(Properties properties) { /** * Loads Hadoop configuration properties from Java system properties. *

- * This method enables overriding values defined in XML configuration files - * (e.g., {@code hdfs-site.xml}, {@code core-site.xml}) via JVM system properties, - * with in-memory values taking precedence. + * This feature is disabled by default and must be explicitly enabled via configuration, + * Key: {@link CommonConfigurationKeys#HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED}. *

- * Naming Rule: System properties must start with {@link #HADOOP_PROPERTY_PREFIX}. + * Naming Rule: Java system properties must start with {@link #HADOOP_PROPERTY_PREFIX}. * The substring after the prefix is used as the actual configuration key. *

* Example: Given {@code hdfs-site.xml}: key: dfs.client.socket-timeout, value: 60000, * Launching the application with: *

{@code java -Dhadoop.property.dfs.client.socket-timeout=30000 -jar application.jar}
- * will extract {@code dfs.client.socket-timeout=30000} and override the XML config at runtime. + * will extract {@code dfs.client.socket-timeout=30000} and override the XML files at runtime. * * @param props the object containing the loaded properties. */ private void loadSystemProperties(Properties props) { + String loadSystemPropsEnabled = + props.getProperty(CommonConfigurationKeys.HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED); + if (!Boolean.parseBoolean(loadSystemPropsEnabled)) { + return; + } Properties systemProperties = System.getProperties(); for (Map.Entry item : systemProperties.entrySet()) { String key = (String) item.getKey(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java index f58331baa81a1..2bb76b663dfe0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java @@ -516,4 +516,12 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic { * {@value}. */ public static final String LOCAL_FS_VERIFY_CHECKSUM = "fs.file.checksum.verify"; + + /** + * Loads system properties switch, default is false. + *

+ * {@value}. + */ + public static final String HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED = + "hadoop.conf.load.system.properties.enabled"; } diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 9859b8b94a4f8..a7f88411293a4 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -880,6 +880,16 @@ + + hadoop.conf.load.system.properties.enabled + false + + Controls whether Hadoop configuration properties are loaded from Java system properties. + When enabled, this setting allows you to override values in XML files in-memory at runtime. + Only Java system properties prefixed with "hadoop.property." will be recognized and loaded. + + + io.file.buffer.size diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java index c319f8d930ca2..1ebe49498ad17 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java @@ -51,6 +51,8 @@ import static java.util.concurrent.TimeUnit.*; import com.fasterxml.jackson.databind.ObjectMapper; + +import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import com.github.stefanbirkner.systemlambda.SystemLambda; @@ -2796,11 +2798,24 @@ public void testLoadEnvironmentVariables() throws Exception { public void testLoadSystemProperties() { Configuration configuration = new Configuration(); assertThat(configuration.get("dfs.client.socket-timeout")).isNull(); + assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull(); + configuration = new Configuration(); + configuration.setBoolean(CommonConfigurationKeys.HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED, + false); + assertThat(configuration.get("dfs.client.socket-timeout")).isNull(); + assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull(); + // Start loads system properties switch + configuration = new Configuration(); + configuration.setBoolean(CommonConfigurationKeys.HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED, + true); + assertThat(configuration.get("dfs.client.socket-timeout")).isNull(); // Set valid property with correct prefix System.setProperty("hadoop.property.dfs.client.socket-timeout", "30000"); // Set invalid property without correct prefix (should be ignored) System.setProperty("invalid.hadoop.property.dfs.datanode.socket.write.timeout", "240000"); configuration = new Configuration(); + configuration.setBoolean(CommonConfigurationKeys.HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED, + true); assertThat(configuration.get("dfs.client.socket-timeout")).isEqualTo("30000"); assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull(); }