GradoopHBaseConfig.java
/*
* Copyright © 2014 - 2021 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.storage.hbase.config;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hbase.TableName;
import org.gradoop.common.model.impl.pojo.EPGMEdgeFactory;
import org.gradoop.common.model.impl.pojo.EPGMGraphHeadFactory;
import org.gradoop.common.model.impl.pojo.EPGMVertexFactory;
import org.gradoop.storage.common.config.GradoopStoreConfig;
import org.gradoop.storage.hbase.impl.api.EdgeHandler;
import org.gradoop.storage.hbase.impl.api.GraphHeadHandler;
import org.gradoop.storage.hbase.impl.api.VertexHandler;
import org.gradoop.storage.hbase.impl.constants.HBaseConstants;
import org.gradoop.storage.hbase.impl.handler.HBaseEdgeHandler;
import org.gradoop.storage.hbase.impl.handler.HBaseGraphHeadHandler;
import org.gradoop.storage.hbase.impl.handler.HBaseVertexHandler;
import org.gradoop.storage.hbase.utils.RegionSplitter;
import org.gradoop.storage.hbase.utils.RowKeyDistributor;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
/**
* Configuration class for using HBase with Gradoop.
*/
public class GradoopHBaseConfig implements GradoopStoreConfig {
/**
* Definition for serialize version control
*/
private static final int serialVersionUID = 23;
/**
* Graph table name.
*/
private final String graphTableName;
/**
* Vertex table name.
*/
private final String vertexTableName;
/**
* Edge table name.
*/
private final String edgeTableName;
/**
* Graph head handler.
*/
private final GraphHeadHandler graphHeadHandler;
/**
* Vertex handler.
*/
private final VertexHandler vertexHandler;
/**
* Edge handler.
*/
private final EdgeHandler edgeHandler;
/**
* Creates a new Configuration.
*
* @param graphHeadHandler graph head handler
* @param vertexHandler vertex handler
* @param edgeHandler edge handler
* @param graphTableName graph table name
* @param vertexTableName vertex table name
* @param edgeTableName edge table name
*/
private GradoopHBaseConfig(
GraphHeadHandler graphHeadHandler,
VertexHandler vertexHandler,
EdgeHandler edgeHandler,
String graphTableName,
String vertexTableName,
String edgeTableName
) {
checkArgument(!StringUtils.isEmpty(graphTableName),
"Graph table name was null or empty");
checkArgument(!StringUtils.isEmpty(vertexTableName),
"Vertex table name was null or empty");
checkArgument(!StringUtils.isEmpty(edgeTableName),
"Edge table name was null or empty");
this.graphTableName = graphTableName;
this.vertexTableName = vertexTableName;
this.edgeTableName = edgeTableName;
this.graphHeadHandler = checkNotNull(graphHeadHandler, "GraphHeadHandler was null");
this.vertexHandler = checkNotNull(vertexHandler, "VertexHandler was null");
this.edgeHandler = checkNotNull(edgeHandler, "EdgeHandler was null");
}
/**
* Creates a new Configuration.
*
* @param config Gradoop configuration
* @param graphTableName graph table name
* @param vertexTableName vertex table name
* @param edgeTableName edge table name
*/
private GradoopHBaseConfig(
GradoopHBaseConfig config,
String graphTableName,
String vertexTableName,
String edgeTableName
) {
this(config.getGraphHeadHandler(),
config.getVertexHandler(),
config.getEdgeHandler(),
graphTableName,
vertexTableName,
edgeTableName);
}
/**
* Creates a default Configuration using POJO handlers for vertices, edges
* and graph heads and default table names.
*
* @return Default Gradoop HBase configuration.
*/
public static GradoopHBaseConfig getDefaultConfig() {
GraphHeadHandler graphHeadHandler = new HBaseGraphHeadHandler(new EPGMGraphHeadFactory());
VertexHandler vertexHandler = new HBaseVertexHandler(new EPGMVertexFactory());
EdgeHandler edgeHandler = new HBaseEdgeHandler(new EPGMEdgeFactory());
return new GradoopHBaseConfig(
graphHeadHandler,
vertexHandler,
edgeHandler,
HBaseConstants.DEFAULT_TABLE_GRAPHS,
HBaseConstants.DEFAULT_TABLE_VERTICES,
HBaseConstants.DEFAULT_TABLE_EDGES
);
}
/**
* Creates a Gradoop HBase configuration based on the given arguments.
*
* @param gradoopConfig Gradoop configuration
* @param graphTableName graph table name
* @param vertexTableName vertex table name
* @param edgeTableName edge table name
*
* @return Gradoop HBase configuration
*/
public static GradoopHBaseConfig createConfig(
GradoopHBaseConfig gradoopConfig,
String graphTableName,
String vertexTableName,
String edgeTableName
) {
return new GradoopHBaseConfig(gradoopConfig, graphTableName, vertexTableName, edgeTableName);
}
/**
* Get vertex table name
*
* @return vertex table name
*/
public TableName getVertexTableName() {
return TableName.valueOf(vertexTableName);
}
/**
* Get edge table name
*
* @return edge table name
*/
public TableName getEdgeTableName() {
return TableName.valueOf(edgeTableName);
}
/**
* Get graph table name
*
* @return graph table name
*/
public TableName getGraphTableName() {
return TableName.valueOf(graphTableName);
}
/**
* Get graph head handler
*
* @return graph head handler
*/
public GraphHeadHandler getGraphHeadHandler() {
return graphHeadHandler;
}
/**
* Get vertex handler
*
* @return vertex handler
*/
public VertexHandler getVertexHandler() {
return vertexHandler;
}
/**
* Get edge handler
*
* @return edge handler
*/
public EdgeHandler getEdgeHandler() {
return edgeHandler;
}
/**
* Enable/Disable the usage of pre-splitting regions at the moment of table creation.
* If the HBase table size grows, it should be created with pre-split regions in order to avoid
* region hotspots. If certain region servers are stressed by very intensive write/read
* operations, HBase may drop that region server because the Zookeeper connection will timeout.
*
* Note that this flag has no effect if the tables already exist.
*
* @param numberOfRegions the number of regions used for splitting
* @return this modified config
*/
public GradoopHBaseConfig enablePreSplitRegions(final int numberOfRegions) {
RegionSplitter.getInstance().setNumberOfRegions(numberOfRegions);
this.vertexHandler.setPreSplitRegions(true);
this.edgeHandler.setPreSplitRegions(true);
this.graphHeadHandler.setPreSplitRegions(true);
return this;
}
/**
* Enable/Disable the usage of a spreading byte as prefix of each HBase row key. This affects
* reading and writing from/to HBase.
*
* Records in HBase are sorted lexicographically by the row key. This allows fast access to
* an individual record by its key and fast fetching of a range of data given start and stop keys.
* But writing records with such naive keys will cause hotspotting because of how HBase writes
* data to its Regions. With this option you can disable this hotspotting.
*
* @param bucketCount the number of spreading bytes to use
* @return this modified config
*/
public GradoopHBaseConfig useSpreadingByte(final int bucketCount) {
RowKeyDistributor.getInstance().setBucketCount((byte) bucketCount);
this.vertexHandler.setSpreadingByteUsage(true);
this.edgeHandler.setSpreadingByteUsage(true);
this.graphHeadHandler.setSpreadingByteUsage(true);
return this;
}
}