Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
sudo: required
dist: trusty
language: java
script: mvn test
script:
- mvn clean -Pspark-2.1 test
- mvn clean -Pspark-2.2 test
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@

OAP - Optimized Analytics Package (previously known as Spinach) is designed to accelerate Ad-hoc query. OAP defines a new parquet-like columnar storage data format and offering a fine-grained hierarchical cache mechanism in the unit of “Fiber” in memory. What’s more, OAP has extended the Spark SQL DDL to allow user to define the customized indices based on relation.
## Building
By defaut, it builds for Spark 2.1.0. To specify the Spark version, please use profile spark-2.1 or spark-2.2.
```
mvn -DskipTests package
mvn -DskipTests -Pspark-2.2 package
```
## Prerequisites
You should have [Apache Spark](http://spark.apache.org/) of version 2.1.0 installed in your cluster. Refer to Apache Spark's [documents](http://spark.apache.org/docs/2.1.0/) for details.
You should have [Apache Spark](http://spark.apache.org/) of version 2.1.0 or 2.2.0 installed in your cluster. Refer to Apache Spark's [documents](http://spark.apache.org/docs/2.1.0/) for details.
## Use OAP with Spark
1. Build OAP, `mvn -DskipTests package` and find `oap-<version>.jar` in `target/`
2. Deploy `oap-<version>.jar` to master machine.
Expand Down
100 changes: 96 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
<maven.compiler.useIncrementalCompilation>false</maven.compiler.useIncrementalCompilation>
<basedir>./</basedir>
<jetty.version>9.2.16.v20160414</jetty.version>
<elasticsearch.version>5.6.4</elasticsearch.version>
</properties>

<repositories>
Expand Down Expand Up @@ -306,6 +307,34 @@
<version>2.13.0</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>${elasticsearch.version}</version>
</dependency>

<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>${elasticsearch.version}</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.elasticsearch.plugin</groupId>
<artifactId>transport-netty3-client</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.elasticsearch.plugin</groupId>
<artifactId>transport-netty4-client</artifactId>
<version>${elasticsearch.version}</version>
</dependency>
</dependencies>

<build>
Expand Down Expand Up @@ -446,9 +475,9 @@
</jvmArgs>
<javacArgs>
<javacArg>-source</javacArg>
<javacArg>1.7</javacArg>
<javacArg>${java.version}</javacArg>
<javacArg>-target</javacArg>
<javacArg>1.7</javacArg>
<javacArg>${java.version}</javacArg>
<javacArg>-Xlint:all,-serial,-path</javacArg>
</javacArgs>
</configuration>
Expand Down Expand Up @@ -498,8 +527,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>3.3</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
<source>${java.version}</source>
<target>${java.version}</target>
<encoding>UTF-8</encoding>
<maxmem>1024m</maxmem>
<fork>true</fork>
Expand Down Expand Up @@ -570,6 +599,7 @@
<spark.internal.version>2.1.0</spark.internal.version>
<scala.version>2.11.8</scala.version>
<scala.binary.version>2.11</scala.binary.version>
<java.version>1.7</java.version>
</properties>
<build>
<plugins>
Expand All @@ -595,6 +625,7 @@
<configuration>
<excludes>
<exclude>src/main/spark2.2</exclude>
<exclude>src/main/spark2.3</exclude>
</excludes>
</configuration>
</plugin>
Expand Down Expand Up @@ -627,6 +658,7 @@
<spark.internal.version>2.2.0</spark.internal.version>
<scala.version>2.11.8</scala.version>
<scala.binary.version>2.11</scala.binary.version>
<java.version>1.8</java.version>
</properties>
<build>
<plugins>
Expand All @@ -652,6 +684,7 @@
<configuration>
<excludes>
<exclude>src/main/spark2.1</exclude>
<exclude>src/main/spark2.3</exclude>
</excludes>
</configuration>
</plugin>
Expand All @@ -677,6 +710,65 @@
</plugins>
</build>
</profile>
<profile>
<id>spark-2.3</id>
<properties>
<spark.version>2.3.0</spark.version>
<spark.internal.version>2.3.0</spark.internal.version>
<scala.version>2.11.8</scala.version>
<scala.binary.version>2.11</scala.binary.version>
<java.version>1.8</java.version>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.antlr</groupId>
<artifactId>antlr4-maven-plugin</artifactId>
<version>4.5.3</version>
<executions>
<execution>
<goals>
<goal>antlr4</goal>
</goals>
</execution>
</executions>
<configuration>
<visitor>true</visitor>
<sourceDirectory>src/main/spark2.3/antlr4</sourceDirectory>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes>
<exclude>src/main/spark2.1</exclude>
<exclude>src/main/spark2.2</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>3.0.0</version>
<executions>
<execution>
<id>add-source</id>
<phase>generate-sources</phase>
<goals>
<goal>add-source</goal>
</goals>
<configuration>
<sources>
<source>src/main/spark2.3</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.execution.datasources.parquet;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add Spark LICENSE?


import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;

import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.network.NetworkModule;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.reindex.ReindexPlugin;
import org.elasticsearch.join.ParentJoinPlugin;
import org.elasticsearch.percolator.PercolatorPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.script.mustache.MustachePlugin;
import org.elasticsearch.transport.Netty4Plugin;

import io.netty.util.ThreadDeathWatcher;
import io.netty.util.concurrent.GlobalEventExecutor;

public class OapTransportClient extends TransportClient {

/**
* Netty wants to do some unwelcome things like use unsafe and replace a private field, or use
* a poorly considered buffer recycler. This method disables these things by default, but can
* be overridden by setting the corresponding system properties.
*/
private static void initializeNetty() {
/*
* We disable three pieces of Netty functionality here:
* - we disable Netty from being unsafe
* - we disable Netty from replacing the selector key set
* - we disable Netty from using the recycler
*
* While permissions are needed to read and set these, the permissions needed here are
* innocuous and thus should simply be granted rather than us handling a security
* exception here.
*/
setSystemPropertyIfUnset("io.netty.noUnsafe", Boolean.toString(true));
setSystemPropertyIfUnset("io.netty.noKeySetOptimization", Boolean.toString(true));
setSystemPropertyIfUnset("io.netty.recycler.maxCapacityPerThread", Integer.toString(0));
}

private static void setSystemPropertyIfUnset(final String key, final String value) {
final String currentValue = System.getProperty(key);
if (currentValue == null) {
System.setProperty(key, value);
}
}

private static final List<String> OPTIONAL_DEPENDENCIES = Arrays.asList( //
"org.elasticsearch.transport.Netty3Plugin", //
"org.elasticsearch.transport.Netty4Plugin");

private static final Collection<Class<? extends Plugin>> PRE_INSTALLED_PLUGINS;

static {

initializeNetty();

List<Class<? extends Plugin>> plugins = new ArrayList<>();

plugins.add(Netty4Plugin.class);
plugins.add(ReindexPlugin.class);
plugins.add(PercolatorPlugin.class);
plugins.add(MustachePlugin.class);
plugins.add(ParentJoinPlugin.class);

PRE_INSTALLED_PLUGINS = Collections.unmodifiableList(plugins);
}

public OapTransportClient(Settings settings) {
super(settings, PRE_INSTALLED_PLUGINS);
}

@Override
public void close() {
super.close();
if (NetworkModule.TRANSPORT_TYPE_SETTING.exists(settings) == false
|| NetworkModule.TRANSPORT_TYPE_SETTING.get(settings).equals(
Netty4Plugin.NETTY_TRANSPORT_NAME)) {
try {
GlobalEventExecutor.INSTANCE.awaitInactivity(5, TimeUnit.SECONDS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
try {
ThreadDeathWatcher.awaitInactivity(5, TimeUnit.SECONDS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,12 @@ private[oap] case class BitMapIndex(entries: Seq[Int] = Nil) extends IndexType {
override def toString: String = "COLUMN(" + entries.mkString(", ") + ") BITMAP"
}

private[oap] case class ESIndex(entries: Seq[Int] = Nil) extends IndexType {
def appendEntry(entry: Int): ESIndex = ESIndex(entries :+ entry)

override def toString: String = "COLUMN(" + entries.mkString(", ") + ") ES"
}

private[oap] case class HashIndex(entries: Seq[Int] = Nil) extends IndexType {
def appendEntry(entry: Int): HashIndex = HashIndex(entries :+ entry)

Expand Down Expand Up @@ -237,6 +243,10 @@ private[oap] class IndexMeta(
out.writeByte(HASH_INDEX_TYPE)
entries.foreach(keyBits += _)
writeBitSet(keyBits, INDEX_META_KEY_LENGTH, out)
case ESIndex(entries) =>
out.writeByte(ES_INDEX_TYPE)
entries.foreach(keyBits += _)
writeBitSet(keyBits, INDEX_META_KEY_LENGTH, out)
}
}

Expand Down Expand Up @@ -268,6 +278,7 @@ private[oap] class IndexMeta(
flag match {
case BITMAP_INDEX_TYPE => BitMapIndex(keyBits.toSeq)
case HASH_INDEX_TYPE => HashIndex(keyBits.toSeq)
case ES_INDEX_TYPE => ESIndex(keyBits.toSeq)
}
}
}
Expand All @@ -277,6 +288,7 @@ private[oap] object IndexMeta {
final val BTREE_INDEX_TYPE = 0
final val BITMAP_INDEX_TYPE = 1
final val HASH_INDEX_TYPE = 2
final val ES_INDEX_TYPE = 3

def apply(): IndexMeta = new IndexMeta()
def apply(name: String, time: String, indexType: IndexType): IndexMeta = {
Expand Down Expand Up @@ -356,6 +368,9 @@ private[oap] case class DataSourceMeta(
case index @ BitMapIndex(entries) =>
entries.map(ordinal =>
schema(ordinal).name).contains(attr) && index.satisfy(requirement)
case index @ ESIndex(entries) =>
entries.map(ordinal =>
schema(ordinal).name).contains(attr) && index.satisfy(requirement)
case _ => false
}
}
Expand Down
Loading