Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions hadoop-common-project/hadoop-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,11 @@
<artifactId>junit-platform-launcher</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.github.stefanbirkner</groupId>
<artifactId>system-lambda</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.glassfish.jaxb</groupId>
<artifactId>jaxb-runtime</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,9 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
private boolean restrictSystemProps = restrictSystemPropsDefault;
private boolean allowNullValueProperties = false;

private static final String HADOOP_CLIENT_OPTS = "HADOOP_CLIENT_OPTS";
private static final String HADOOP_PROPERTY_PREFIX = "hadoop.property.";

private static class Resource {
private final Object resource;
private final String name;
Expand Down Expand Up @@ -2934,6 +2937,7 @@ protected synchronized Properties getProps() {
if (properties == null) {
properties = new Properties();
loadProps(properties, 0, true);
loadOtherProps(properties);
}
return properties;
}
Expand Down Expand Up @@ -2967,6 +2971,101 @@ private synchronized void loadProps(final Properties props,
}
}

/**
* Loads configuration properties from both environment variables and Java system properties.
* <p>
* Java system properties (-D) take precedence over environment variables.
* To ensure this overriding behavior, environment variables are loaded first,
* followed by system properties.
*
* @param props the object containing the loaded properties.
*/
private void loadOtherProps(Properties props) {
loadEnvironmentVariables(props);
loadSystemProperties(props);
}

/**
* Loads Hadoop configuration properties from the "HADOOP_CLIENT_OPTS" environment variable.
* <p>
* This method extracts {@code -Dkey=value or -D key=value} style strings from the value of the
* {@code HADOOP_CLIENT_OPTS} environment variable.
* <p>
* <b>Example:</b> Given {@code hdfs-site.xml}: key: dfs.client.socket-timeout, value: 60000,
* Setting the environment variable:
* <pre>{@code export HADOOP_CLIENT_OPTS="-Xmx1g -Ddfs.client.socket-timeout=30000"}</pre>
* will extract {@code dfs.client.socket-timeout=30000} and override the XML files at runtime.
* <p>
* <b>String format:</b>
* <ul>
* <li>{@code -Dkey=value}</li>
* <li>{@code -D key=value}</li>
* </ul>
*
* @param props the object containing the loaded properties.
*/
private void loadEnvironmentVariables(Properties props) {
String hadoopClientOpts = System.getenv(HADOOP_CLIENT_OPTS);
if (hadoopClientOpts == null || hadoopClientOpts.trim().isEmpty()) {
return;
}
String[] tokens = hadoopClientOpts.trim().split("\\s+");
for (int i = 0; i < tokens.length; i++) {
String token = tokens[i];
if (token == null) {
continue;
}
String keyValueStr = null;
if (token.equals("-D")) {
if (i < tokens.length - 1 && tokens[i + 1].contains("=")) {
keyValueStr = tokens[++i];
}
} else if (token.startsWith("-D") && token.contains("=")) {
keyValueStr = token;
}
if (keyValueStr != null) {
String rawPair = keyValueStr.startsWith("-D") ? keyValueStr.substring(2) : keyValueStr;
int eqIndex = rawPair.indexOf('=');
String key = rawPair.substring(0, eqIndex);
String value = rawPair.substring(eqIndex + 1).trim();
loadProperty(props, "env", key, value, false, new String[] {"env-property"});
}
}
}

/**
* Loads Hadoop configuration properties from Java system properties.
* <p>
* This feature is disabled by default and must be explicitly enabled via configuration,
* Key: {@link CommonConfigurationKeys#HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED}.
* <p>
* <b>Naming Rule:</b> Java system properties must start with {@link #HADOOP_PROPERTY_PREFIX}.
* The substring after the prefix is used as the actual configuration key.
* <p>
* <b>Example:</b> Given {@code hdfs-site.xml}: key: dfs.client.socket-timeout, value: 60000,
* Launching the application with:
* <pre>{@code java -Dhadoop.property.dfs.client.socket-timeout=30000 -jar application.jar}</pre>
* will extract {@code dfs.client.socket-timeout=30000} and override the XML files at runtime.
*
* @param props the object containing the loaded properties.
*/
private void loadSystemProperties(Properties props) {
String loadSystemPropsEnabled =
props.getProperty(CommonConfigurationKeys.HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED);
if (!Boolean.parseBoolean(loadSystemPropsEnabled)) {
return;
}
Properties systemProperties = System.getProperties();
for (Map.Entry<Object, Object> item : systemProperties.entrySet()) {
String key = (String) item.getKey();
if (key.startsWith(HADOOP_PROPERTY_PREFIX)) {
String attr = key.substring(HADOOP_PROPERTY_PREFIX.length());
String value = (String) item.getValue();
loadProperty(props, "system", attr, value, false, new String[] {"system-property"});
}
}
}

/**
* Return the number of keys in the configuration.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -516,4 +516,12 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
* {@value}.
*/
public static final String LOCAL_FS_VERIFY_CHECKSUM = "fs.file.checksum.verify";

/**
* Loads system properties switch, default is false.
* <p>
* {@value}.
*/
public static final String HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED =
"hadoop.conf.load.system.properties.enabled";
}
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,16 @@
</description>
</property>

<property>
<name>hadoop.conf.load.system.properties.enabled</name>
<value>false</value>
<description>
Controls whether Hadoop configuration properties are loaded from Java system properties.
When enabled, this setting allows you to override values in XML files in-memory at runtime.
Only Java system properties prefixed with "hadoop.property." will be recognized and loaded.
</description>
</property>

<!-- i/o properties -->
<property>
<name>io.file.buffer.size</name>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,18 @@ Apache Hadoop has many environment variables that control various aspects of the

This environment variable is used for all end-user, non-daemon operations. It can be used to set any Java options as well as any Apache Hadoop options via a system property definition. For example:

Set the client socket timeout via dfs.client.socket-timeout
```bash
HADOOP_CLIENT_OPTS="-Xmx1g -Dhadoop.socks.server=localhost:4000" hadoop fs -ls /tmp
export HADOOP_CLIENT_OPTS="-Xmx1g -Ddfs.client.socket-timeout=30000"

hadoop fs -ls hdfs://localhost:8020/tmp
```

Define the default filesystem via fs.defaultFS
```bash
export HADOOP_CLIENT_OPTS="-Xmx1g -Dfs.defaultFS=hdfs://localhost:8020/"

hadoop fs -ls /tmp
```

will increase the memory and send this command via a SOCKS proxy server.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@
import static java.util.concurrent.TimeUnit.*;

import com.fasterxml.jackson.databind.ObjectMapper;

import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;

import com.github.stefanbirkner.systemlambda.SystemLambda;
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
Expand All @@ -62,6 +66,7 @@
import static org.apache.hadoop.conf.StorageUnit.KB;
import static org.apache.hadoop.conf.StorageUnit.MB;
import static org.apache.hadoop.conf.StorageUnit.TB;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
Expand Down Expand Up @@ -2764,4 +2769,54 @@ public void testConcurrentModificationDuringIteration() throws InterruptedExcept

assertFalse(exceptionOccurred.get(), "ConcurrentModificationException occurred");
}

@Test
public void testLoadEnvironmentVariables() throws Exception {
// Test valid configurations: both "-Dkey=value" and "-D key=value" formats
SystemLambda.withEnvironmentVariable("HADOOP_CLIENT_OPTS",
"-Ddfs.client.socket-timeout=30000 -D dfs.datanode.socket.write.timeout=240000")
.execute(() -> {
Configuration configuration = new Configuration();
assertThat(configuration.get("dfs.client.socket-timeout")).isEqualTo("30000");
assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isEqualTo("240000");
});
// Test invalid configurations: "-X" formats should be ignored
SystemLambda.withEnvironmentVariable("HADOOP_CLIENT_OPTS",
"-Xdfs.client.socket-timeout=30000 -X dfs.datanode.socket.write.timeout=240000")
.execute(() -> {
Configuration configuration = new Configuration();
assertThat(configuration.get("dfs.client.socket-timeout")).isNull();
assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull();
});
// There are no environment variables
Configuration configuration = new Configuration();
assertThat(configuration.get("dfs.client.socket-timeout")).isNull();
assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull();
}

@Test
public void testLoadSystemProperties() {
Configuration configuration = new Configuration();
assertThat(configuration.get("dfs.client.socket-timeout")).isNull();
assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull();
configuration = new Configuration();
configuration.setBoolean(CommonConfigurationKeys.HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED,
false);
assertThat(configuration.get("dfs.client.socket-timeout")).isNull();
assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull();
// Start loads system properties switch
configuration = new Configuration();
configuration.setBoolean(CommonConfigurationKeys.HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED,
true);
assertThat(configuration.get("dfs.client.socket-timeout")).isNull();
// Set valid property with correct prefix
System.setProperty("hadoop.property.dfs.client.socket-timeout", "30000");
// Set invalid property without correct prefix (should be ignored)
System.setProperty("invalid.hadoop.property.dfs.datanode.socket.write.timeout", "240000");
configuration = new Configuration();
configuration.setBoolean(CommonConfigurationKeys.HADOOP_CONF_LOAD_SYSTEM_PROPERTIES_ENABLED,
true);
assertThat(configuration.get("dfs.client.socket-timeout")).isEqualTo("30000");
assertThat(configuration.get("dfs.datanode.socket.write.timeout")).isNull();
}
}
10 changes: 9 additions & 1 deletion hadoop-project/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@
<yarnpkg.version>v1.22.5</yarnpkg.version>
<apache-ant.version>1.10.13</apache-ant.version>
<jmh.version>1.20</jmh.version>
<system-rules.version>1.18.0</system-rules.version>
<system-lambda.version>1.2.1</system-lambda.version>
</properties>

<dependencyManagement>
Expand Down Expand Up @@ -1212,7 +1214,7 @@
<dependency>
<groupId>com.github.stefanbirkner</groupId>
<artifactId>system-rules</artifactId>
<version>1.18.0</version>
<version>${system-rules.version}</version>
<exclusions>
<exclusion>
<groupId>junit</groupId>
Expand All @@ -1224,6 +1226,12 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.github.stefanbirkner</groupId>
<artifactId>system-lambda</artifactId>
<version>${system-lambda.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId>
Expand Down
Loading