diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 35ac06c826c52..3a6a396e1a118 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -390,6 +390,11 @@ junit-platform-launcher test + + com.github.stefanbirkner + system-lambda + test + org.glassfish.jaxb jaxb-runtime diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index 2e112eed8c893..c964faa30e018 100755 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -245,6 +245,9 @@ public class Configuration implements Iterable>, private boolean restrictSystemProps = restrictSystemPropsDefault; private boolean allowNullValueProperties = false; + private static final String HADOOP_CLIENT_OPTS = "HADOOP_CLIENT_OPTS"; + private static final String HADOOP_PROPERTY_PREFIX = "hadoop.property."; + private static class Resource { private final Object resource; private final String name; @@ -2934,6 +2937,7 @@ protected synchronized Properties getProps() { if (properties == null) { properties = new Properties(); loadProps(properties, 0, true); + loadOtherProps(properties); } return properties; } @@ -2967,6 +2971,101 @@ private synchronized void loadProps(final Properties props, } } + /** + * Loads configuration properties from both environment variables and Java system properties. + *

+ * Java system properties (-D) take precedence over environment variables. + * To ensure this overriding behavior, environment variables are loaded first, + * followed by system properties. + * + * @param props the object containing the loaded properties. + */ + private void loadOtherProps(Properties props) { + loadEnvironmentVariables(props); + loadSystemProperties(props); + } + + /** + * Loads Hadoop configuration properties from the "HADOOP_CLIENT_OPTS" environment variable. + *

+ * This method extracts {@code -Dkey=value or -D key=value} style strings from the value of the + * {@code HADOOP_CLIENT_OPTS} environment variable. + *

+ * Example: Given {@code hdfs-site.xml}: key: dfs.client.socket-timeout, value: 60000, + * Setting the environment variable: + *

{@code export HADOOP_CLIENT_OPTS="-Xmx1g -Ddfs.client.socket-timeout=30000"}
+ * will extract {@code dfs.client.socket-timeout=30000} and override the XML files at runtime. + *

+ * String format: + *

+ * + * @param props the object containing the loaded properties. + */ + private void loadEnvironmentVariables(Properties props) { + String hadoopClientOpts = System.getenv(HADOOP_CLIENT_OPTS); + if (hadoopClientOpts == null || hadoopClientOpts.trim().isEmpty()) { + return; + } + String[] tokens = hadoopClientOpts.trim().split("\\s+"); + for (int i = 0; i < tokens.length; i++) { + String token = tokens[i]; + if (token == null) { + continue; + } + String keyValueStr = null; + if (token.equals("-D")) { + if (i < tokens.length - 1 && tokens[i + 1].contains("=")) { + keyValueStr = tokens[++i]; + } + } else if (token.startsWith("-D") && token.contains("=")) { + keyValueStr = token; + } + if (keyValueStr != null) { + String rawPair = keyValueStr.startsWith("-D") ? keyValueStr.substring(2) : keyValueStr; + int eqIndex = rawPair.indexOf('='); + String key = rawPair.substring(0, eqIndex); + String value = rawPair.substring(eqIndex + 1).trim(); + loadProperty(props, "env", key, value, false, new String[] {"env-property"}); + } + } + } + + /** + * Loads Hadoop configuration properties from Java system properties. + *

+ * This feature is disabled by default and must be explicitly enabled via configuration, + * Key: {@link CommonConfigurationKeys#HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED}. + *

+ * Naming Rule: Java system properties must start with {@link #HADOOP_PROPERTY_PREFIX}. + * The substring after the prefix is used as the actual configuration key. + *

+ * Example: Given {@code hdfs-site.xml}: key: dfs.client.socket-timeout, value: 60000, + * Launching the application with: + *

{@code java -Dhadoop.property.dfs.client.socket-timeout=30000 -jar application.jar}
+ * will extract {@code dfs.client.socket-timeout=30000} and override the XML files at runtime. + * + * @param props the object containing the loaded properties. + */ + private void loadSystemProperties(Properties props) { + String loadSystemPropsEnabled = + props.getProperty(CommonConfigurationKeys.HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED); + if (!Boolean.parseBoolean(loadSystemPropsEnabled)) { + return; + } + Properties systemProperties = System.getProperties(); + for (Map.Entry item : systemProperties.entrySet()) { + String key = (String) item.getKey(); + if (key.startsWith(HADOOP_PROPERTY_PREFIX)) { + String attr = key.substring(HADOOP_PROPERTY_PREFIX.length()); + String value = (String) item.getValue(); + loadProperty(props, "system", attr, value, false, new String[] {"system-property"}); + } + } + } + /** * Return the number of keys in the configuration. * diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java index f58331baa81a1..2bb76b663dfe0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java @@ -516,4 +516,12 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic { * {@value}. */ public static final String LOCAL_FS_VERIFY_CHECKSUM = "fs.file.checksum.verify"; + + /** + * Loads system properties switch, default is false. + *

+ * {@value}. + */ + public static final String HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED = + "hadoop.conf.load.system.properties.enabled"; } diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 9859b8b94a4f8..a7f88411293a4 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -880,6 +880,16 @@ + + hadoop.conf.load.system.properties.enabled + false + + Controls whether Hadoop configuration properties are loaded from Java system properties. + When enabled, this setting allows you to override values in XML files in-memory at runtime. + Only Java system properties prefixed with "hadoop.property." will be recognized and loaded. + + + io.file.buffer.size diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md b/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md index ca32fd8ee2f95..7bb2aaaf84b23 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md @@ -26,8 +26,18 @@ Apache Hadoop has many environment variables that control various aspects of the This environment variable is used for all end-user, non-daemon operations. It can be used to set any Java options as well as any Apache Hadoop options via a system property definition. For example: +Set the client socket timeout via dfs.client.socket-timeout ```bash -HADOOP_CLIENT_OPTS="-Xmx1g -Dhadoop.socks.server=localhost:4000" hadoop fs -ls /tmp +export HADOOP_CLIENT_OPTS="-Xmx1g -Ddfs.client.socket-timeout=30000" + +hadoop fs -ls hdfs://localhost:8020/tmp +``` + +Define the default filesystem via fs.defaultFS +```bash +export HADOOP_CLIENT_OPTS="-Xmx1g -Dfs.defaultFS=hdfs://localhost:8020/" + +hadoop fs -ls /tmp ``` will increase the memory and send this command via a SOCKS proxy server. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java index 56ec25ebd8da8..1ebe49498ad17 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java @@ -51,7 +51,11 @@ import static java.util.concurrent.TimeUnit.*; import com.fasterxml.jackson.databind.ObjectMapper; + +import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; + +import com.github.stefanbirkner.systemlambda.SystemLambda; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -62,6 +66,7 @@ import static org.apache.hadoop.conf.StorageUnit.KB; import static org.apache.hadoop.conf.StorageUnit.MB; import static org.apache.hadoop.conf.StorageUnit.TB; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -2764,4 +2769,54 @@ public void testConcurrentModificationDuringIteration() throws InterruptedExcept assertFalse(exceptionOccurred.get(), "ConcurrentModificationException occurred"); } + + @Test + public void testLoadEnvironmentVariables() throws Exception { + // Test valid configurations: both "-Dkey=value" and "-D key=value" formats + SystemLambda.withEnvironmentVariable("HADOOP_CLIENT_OPTS", + "-Ddfs.client.socket-timeout=30000 -D dfs.datanode.socket.write.timeout=240000") + .execute(() -> { + Configuration configuration = new Configuration(); + assertThat(configuration.get("dfs.client.socket-timeout")).isEqualTo("30000"); + assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isEqualTo("240000"); + }); + // Test invalid configurations: "-X" formats should be ignored + SystemLambda.withEnvironmentVariable("HADOOP_CLIENT_OPTS", + "-Xdfs.client.socket-timeout=30000 -X dfs.datanode.socket.write.timeout=240000") + .execute(() -> { + Configuration configuration = new Configuration(); + assertThat(configuration.get("dfs.client.socket-timeout")).isNull(); + assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull(); + }); + // There are no environment variables + Configuration configuration = new Configuration(); + assertThat(configuration.get("dfs.client.socket-timeout")).isNull(); + assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull(); + } + + @Test + public void testLoadSystemProperties() { + Configuration configuration = new Configuration(); + assertThat(configuration.get("dfs.client.socket-timeout")).isNull(); + assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull(); + configuration = new Configuration(); + configuration.setBoolean(CommonConfigurationKeys.HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED, + false); + assertThat(configuration.get("dfs.client.socket-timeout")).isNull(); + assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull(); + // Start loads system properties switch + configuration = new Configuration(); + configuration.setBoolean(CommonConfigurationKeys.HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED, + true); + assertThat(configuration.get("dfs.client.socket-timeout")).isNull(); + // Set valid property with correct prefix + System.setProperty("hadoop.property.dfs.client.socket-timeout", "30000"); + // Set invalid property without correct prefix (should be ignored) + System.setProperty("invalid.hadoop.property.dfs.datanode.socket.write.timeout", "240000"); + configuration = new Configuration(); + configuration.setBoolean(CommonConfigurationKeys.HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED, + true); + assertThat(configuration.get("dfs.client.socket-timeout")).isEqualTo("30000"); + assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull(); + } } diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 15c3df947c33d..343475e5d88a7 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -240,6 +240,8 @@ v1.22.5 1.10.13 1.20 + 1.18.0 + 1.2.1 @@ -1212,7 +1214,7 @@ com.github.stefanbirkner system-rules - 1.18.0 + ${system-rules.version} junit @@ -1224,6 +1226,12 @@ + + com.github.stefanbirkner + system-lambda + ${system-lambda.version} + test + org.apache.commons commons-collections4