GreedyPlanner.java
/*
* Copyright © 2014 - 2021 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.planner.greedy;
import com.google.common.collect.Sets;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.flink.api.java.DataSet;
import org.gradoop.common.model.api.entities.GraphHead;
import org.gradoop.common.util.GradoopConstants;
import org.gradoop.flink.model.api.epgm.BaseGraph;
import org.gradoop.flink.model.api.epgm.BaseGraphCollection;
import org.gradoop.flink.model.impl.operators.matching.common.MatchStrategy;
import org.gradoop.flink.model.impl.operators.matching.common.query.QueryHandler;
import org.gradoop.flink.model.impl.operators.matching.common.query.predicates.CNF;
import org.gradoop.flink.model.impl.operators.matching.common.query.predicates.CNFElement;
import org.gradoop.flink.model.impl.operators.matching.common.query.predicates.QueryComparable;
import org.gradoop.flink.model.impl.operators.matching.common.query.predicates.comparables.PropertySelectorComparable;
import org.gradoop.flink.model.impl.operators.matching.common.query.predicates.expressions.ComparisonExpression;
import org.gradoop.flink.model.impl.operators.matching.common.statistics.GraphStatistics;
import org.gradoop.flink.model.impl.operators.matching.single.cypher.utils.ExpandDirection;
import org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.estimation.QueryPlanEstimator;
import org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.plantable.PlanTable;
import org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.plantable.PlanTableEntry;
import org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.queryplan.PlanNode;
import org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.queryplan.QueryPlan;
import org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.queryplan.binary.CartesianProductNode;
import org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.queryplan.binary.ExpandEmbeddingsNode;
import org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.queryplan.binary.JoinEmbeddingsNode;
import org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.queryplan.binary.ValueJoinNode;
import org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.queryplan.leaf.FilterAndProjectEdgesNode;
import org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.queryplan.leaf.FilterAndProjectVerticesNode;
import org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.queryplan.unary.FilterEmbeddingsNode;
import org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.queryplan.unary.ProjectEmbeddingsNode;
import org.gradoop.gdl.model.Edge;
import org.gradoop.gdl.model.Vertex;
import org.gradoop.gdl.utils.Comparator;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import static org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.plantable.PlanTableEntry.Type.EDGE;
import static org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.plantable.PlanTableEntry.Type.GRAPH;
import static org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.plantable.PlanTableEntry.Type.PATH;
import static org.gradoop.flink.model.impl.operators.matching.single.cypher.planning.plantable.PlanTableEntry.Type.VERTEX;
/**
* A greedy query planner that builds a query plan by iteratively picking the cheapest partial query
* plan and extending it.
*
* @param <G> The graph head type.
* @param <V> The vertex type.
* @param <E> The edge type.
* @param <LG> The graph type.
* @param <GC> The graph collection type.
*/
public class GreedyPlanner<
G extends GraphHead,
V extends org.gradoop.common.model.api.entities.Vertex,
E extends org.gradoop.common.model.api.entities.Edge,
LG extends BaseGraph<G, V, E, LG, GC>,
GC extends BaseGraphCollection<G, V, E, LG, GC>> {
/**
* The search graph to be queried
*/
private final LG graph;
/**
* The query handler represents the query.
*/
private final QueryHandler queryHandler;
/**
* Statistics about the search graph.
*/
private final GraphStatistics graphStatistics;
/**
* The morphism type for vertex mappings.
*/
private final MatchStrategy vertexStrategy;
/**
* The morphism type for edge mappings.
*/
private final MatchStrategy edgeStrategy;
/**
* Creates a new greedy planner.
*
* @param graph search graph
* @param queryHandler query handler
* @param graphStatistics search graph statistics
* @param vertexStrategy morphism type for vertex mappings
* @param edgeStrategy morphism type for edge mappings
*/
public GreedyPlanner(LG graph, QueryHandler queryHandler,
GraphStatistics graphStatistics, MatchStrategy vertexStrategy, MatchStrategy edgeStrategy) {
this.graph = graph;
this.queryHandler = queryHandler;
this.graphStatistics = graphStatistics;
this.vertexStrategy = vertexStrategy;
this.edgeStrategy = edgeStrategy;
}
/**
* Computes the {@link PlanTableEntry} that wraps the {@link QueryPlan} with the minimum costs
* according to the greedy optimization algorithm.
*
* @return entry with minimum execution costs
*/
public PlanTableEntry plan() {
PlanTable planTable = initPlanTable();
while (planTable.size() > 1) {
PlanTable newPlans = evaluateJoins(planTable);
if (newPlans.size() == 0) {
// No new plans where generated by joining but there are still multiple disconnected
// query graph components. In this case we need to do a cartesian product.
newPlans = evaluateCartesianProducts(planTable);
}
newPlans = evaluateFilter(newPlans);
newPlans = evaluateProjection(newPlans);
// get plan with minimum costs and remove all plans covered by this plan
PlanTableEntry bestEntry = newPlans.min();
planTable.removeCoveredBy(bestEntry);
planTable.add(bestEntry);
}
return planTable.get(0);
}
//------------------------------------------------------------------------------------------------
// Initialization
//------------------------------------------------------------------------------------------------
/**
* Creates the initial plan table entries according to the specified vertices and edges.
*
* @return initial plan table
*/
private PlanTable initPlanTable() {
PlanTable planTable = new PlanTable();
createVertexPlans(planTable);
createEdgePlans(planTable);
return planTable;
}
//------------------------------------------------------------------------------------------------
// Leaf nodes (i.e. vertices and (variable length) edges)
//------------------------------------------------------------------------------------------------
/**
* Creates an initial {@link PlanTableEntry} for each vertex in the query graph and adds it to the
* specified {@link PlanTable}. The entry wraps a query plan that filters vertices based on their
* predicates and projects properties that are required for further query planning.
*
* @param planTable plan table
*/
private void createVertexPlans(PlanTable planTable) {
for (Vertex vertex : queryHandler.getVertices()) {
String vertexVariable = vertex.getVariable();
CNF allPredicates = queryHandler.getPredicates();
// TODO: this might be moved to the FilterAndProject node in issue #510
CNF vertexPredicates = allPredicates.removeSubCNF(vertexVariable);
Set<String> projectionKeys = allPredicates.getPropertyKeys(vertexVariable);
DataSet<V> vertices =
vertex.getLabel().equals(GradoopConstants.DEFAULT_VERTEX_LABEL) ?
graph.getVertices() : graph.getVerticesByLabel(vertex.getLabel());
FilterAndProjectVerticesNode<V> node = new FilterAndProjectVerticesNode<>(vertices,
vertex.getVariable(), vertexPredicates, projectionKeys);
planTable.add(new PlanTableEntry(VERTEX, Sets.newHashSet(vertexVariable), allPredicates,
new QueryPlanEstimator(new QueryPlan(node), queryHandler, graphStatistics)));
}
}
/**
* Creates an initial {@link PlanTableEntry} for each edge in the query graph and adds it to the
* specified {@link PlanTable}. The entry wraps a {@link QueryPlan} that filters edges based on
* their predicates and projects properties that are required for further query planning.
*
* @param planTable plan table
*/
private void createEdgePlans(PlanTable planTable) {
for (Edge edge : queryHandler.getEdges()) {
String edgeVariable = edge.getVariable();
String sourceVariable = queryHandler.getVertexById(edge.getSourceVertexId()).getVariable();
String targetVariable = queryHandler.getVertexById(edge.getTargetVertexId()).getVariable();
CNF allPredicates = queryHandler.getPredicates();
// TODO: this might be moved the the FilterAndProject node in issue #510
CNF edgePredicates = allPredicates.removeSubCNF(edgeVariable);
Set<String> projectionKeys = allPredicates.getPropertyKeys(edgeVariable);
boolean isPath = edge.getUpperBound() != 1;
DataSet<E> edges =
edge.getLabel().equals(GradoopConstants.DEFAULT_EDGE_LABEL) ?
graph.getEdges() : graph.getEdgesByLabel(edge.getLabel());
FilterAndProjectEdgesNode<E> node = new FilterAndProjectEdgesNode<>(edges,
sourceVariable, edgeVariable, targetVariable, edgePredicates, projectionKeys, isPath);
PlanTableEntry.Type type = edge.hasVariableLength() ? PATH : EDGE;
planTable.add(new PlanTableEntry(type, Sets.newHashSet(edgeVariable), allPredicates,
new QueryPlanEstimator(new QueryPlan(node), queryHandler, graphStatistics)));
}
}
//------------------------------------------------------------------------------------------------
// Join and Expand
//------------------------------------------------------------------------------------------------
/**
* Evaluates which entries in the specified plan table can be joined. The joined entries
* are added to a new table which is returned.
*
* @param currentTable query plan table
* @return new table containing solely joined plans from the input table
*/
private PlanTable evaluateJoins(PlanTable currentTable) {
PlanTable newTable = new PlanTable();
for (int i = 0; i < currentTable.size(); i++) {
PlanTableEntry leftEntry = currentTable.get(i);
if (mayExtend(leftEntry)) {
for (int j = 0; j < currentTable.size(); j++) {
PlanTableEntry rightEntry = currentTable.get(j);
if (i != j) {
List<String> joinVariables = getOverlap(leftEntry, rightEntry);
if (joinVariables.size() > 0) {
if (rightEntry.getType() == PATH && joinVariables.size() == 2) {
// evaluate join with variable length path on source and target vertex
newTable.add(joinEntries(leftEntry, rightEntry, joinVariables.subList(0, 1)));
newTable.add(joinEntries(leftEntry, rightEntry, joinVariables.subList(1, 2)));
} else {
// regular join or join with variable length path on source or target vertex
newTable.add(joinEntries(leftEntry, rightEntry, joinVariables));
}
}
}
}
}
}
return newTable;
}
/**
* Checks if the given entry may be extended. This is only the case for entries that represents
* either a vertex or a partial match graph.
*
* @param entry plan table entry
* @return true, iff the specified entry may be extended
*/
private boolean mayExtend(PlanTableEntry entry) {
return entry.getType() == VERTEX || entry.getType() == GRAPH;
}
/**
* Computes the overlapping query variables of the specified entries.
*
* @param firstEntry first entry
* @param secondEntry second entry
* @return variables that are available in both input entries
*/
private List<String> getOverlap(PlanTableEntry firstEntry, PlanTableEntry secondEntry) {
Set<String> overlap = firstEntry.getAllVariables();
overlap.retainAll(secondEntry.getAllVariables());
return new ArrayList<>(overlap);
}
/**
* Joins the query plans represented by the specified plan table entries.
*
* The method considers if the right entry is a variable length path and in that case
* creates an {@link ExpandEmbeddingsNode}. In any other case, a regular
* {@link JoinEmbeddingsNode} is used to join the query plans.
*
* @param leftEntry left entry
* @param rightEntry right entry
* @param joinVariables join variables
* @return an entry that represents the join of both input entries
*/
private PlanTableEntry joinEntries(PlanTableEntry leftEntry, PlanTableEntry rightEntry,
List<String> joinVariables) {
PlanNode node;
if (rightEntry.getType() == PATH) {
assert joinVariables.size() == 1;
node = createExpandNode(leftEntry, rightEntry, joinVariables.get(0));
} else {
node = new JoinEmbeddingsNode(leftEntry.getQueryPlan().getRoot(),
rightEntry.getQueryPlan().getRoot(), joinVariables, vertexStrategy, edgeStrategy);
}
// update processed variables
HashSet<String> processedVariables = Sets.newHashSet(leftEntry.getProcessedVariables());
processedVariables.addAll(rightEntry.getProcessedVariables());
// create resulting predicates
// TODO: this might be moved to the join/expand node in issue #510
CNF predicates = mergePredicates(leftEntry, rightEntry);
return new PlanTableEntry(GRAPH, processedVariables, predicates,
new QueryPlanEstimator(new QueryPlan(node), queryHandler, graphStatistics));
}
/**
* Creates an {@link ExpandEmbeddingsNode} from the specified arguments.
*
* @param leftEntry left entry
* @param rightEntry right entry
* @param startVariable vertex variable to expand from
*
* @return new expand node
*/
private ExpandEmbeddingsNode createExpandNode(PlanTableEntry leftEntry, PlanTableEntry rightEntry,
String startVariable) {
String pathVariable = rightEntry.getQueryPlan().getRoot()
.getEmbeddingMetaData().getEdgeVariables().get(0);
Edge queryEdge = queryHandler.getEdgeByVariable(pathVariable);
Vertex sourceVertex = queryHandler.getVertexById(queryEdge.getSourceVertexId());
Vertex targetVertex = queryHandler.getVertexById(queryEdge.getTargetVertexId());
int lowerBound = queryEdge.getLowerBound();
int upperBound = queryEdge.getUpperBound();
ExpandDirection direction = sourceVertex.getVariable().equals(startVariable) ?
ExpandDirection.OUT : ExpandDirection.IN;
String endVariable = direction == ExpandDirection.OUT ?
targetVertex.getVariable() : sourceVertex.getVariable();
return new ExpandEmbeddingsNode(leftEntry.getQueryPlan().getRoot(),
rightEntry.getQueryPlan().getRoot(),
startVariable, pathVariable, endVariable, lowerBound, upperBound, direction,
vertexStrategy, edgeStrategy);
}
//------------------------------------------------------------------------------------------------
// Filter embedding evaluation
//------------------------------------------------------------------------------------------------
/**
* The method checks if a filter can be applied on any of the entries in the specified table. If
* this is the case, a {@link FilterEmbeddingsNode} is added to the query plan represented by the
* affected entries.
*
* @param currentTable query plan table
* @return input table with possibly updated entries
*/
private PlanTable evaluateFilter(PlanTable currentTable) {
PlanTable newTable = new PlanTable();
for (PlanTableEntry entry : currentTable) {
Set<String> variables = Sets.newHashSet(entry.getProcessedVariables());
CNF predicates = entry.getPredicates();
CNF subCNF = predicates.removeSubCNF(variables);
if (subCNF.size() > 0) {
FilterEmbeddingsNode node = new FilterEmbeddingsNode(entry.getQueryPlan().getRoot(),
subCNF);
newTable.add(new PlanTableEntry(GRAPH, Sets.newHashSet(entry.getProcessedVariables()),
predicates, new QueryPlanEstimator(new QueryPlan(node), queryHandler, graphStatistics)));
} else {
newTable.add(entry);
}
}
return newTable;
}
//------------------------------------------------------------------------------------------------
// Filter embedding evaluation
//------------------------------------------------------------------------------------------------
/**
* The method checks if a filter can be applied on any of the entries in the specified table. If
* this is the case, a {@link ProjectEmbeddingsNode} is added to the query plan represented by the
* affected entries.
*
* @param currentTable query plan table
* @return input table with possibly updated entries
*/
private PlanTable evaluateProjection(PlanTable currentTable) {
PlanTable newTable = new PlanTable();
for (PlanTableEntry entry : currentTable) {
Set<Pair<String, String>> propertyPairs = entry.getPropertyPairs();
Set<Pair<String, String>> projectionPairs = entry.getProjectionPairs();
Set<Pair<String, String>> updatedPropertyPairs = propertyPairs.stream()
.filter(projectionPairs::contains)
.collect(Collectors.toSet());
if (updatedPropertyPairs.size() < propertyPairs.size()) {
ProjectEmbeddingsNode node = new ProjectEmbeddingsNode(entry.getQueryPlan().getRoot(),
new ArrayList<>(updatedPropertyPairs));
newTable.add(new PlanTableEntry(GRAPH,
Sets.newHashSet(entry.getProcessedVariables()), entry.getPredicates(),
new QueryPlanEstimator(new QueryPlan(node), queryHandler, graphStatistics)));
} else {
newTable.add(entry);
}
}
return newTable;
}
//------------------------------------------------------------------------------------------------
// Join and Expand
//------------------------------------------------------------------------------------------------
/**
* Evaluates which entries in the specified plan table can be joined. The joined entries
* are added to a new table which is returned.
*
* @param currentTable query plan table
* @return new table containing solely joined plans from the input table
*/
private PlanTable evaluateCartesianProducts(PlanTable currentTable) {
PlanTable newTable = new PlanTable();
for (int i = 0; i < currentTable.size(); i++) {
PlanTableEntry leftEntry = currentTable.get(i);
for (int j = i + 1; j < currentTable.size(); j++) {
PlanTableEntry rightEntry = currentTable.get(j);
CNF joinPredicate = getJoinPredicate(leftEntry, rightEntry);
if (joinPredicate.size() > 0) {
newTable.add(createValueJoinEntry(leftEntry, rightEntry, joinPredicate));
} else {
// regular join or join with variable length path on source or target vertex
newTable.add(createCartesianProductEntry(leftEntry, rightEntry));
}
}
}
return newTable;
}
/**
* Computes the overlapping query variables of the specified entries.
*
* @param leftEntry first entry
* @param rightEntry second entry
* @return variables that are available in both input entries
*/
private CNF getJoinPredicate(PlanTableEntry leftEntry, PlanTableEntry rightEntry) {
Set<String> allVariables = leftEntry.getAllVariables();
allVariables.addAll(rightEntry.getAllVariables());
CNF leftPredicates = new CNF(leftEntry.getPredicates());
CNF rightPredicates = new CNF(rightEntry.getPredicates());
leftPredicates.removeSubCNF(rightEntry.getProcessedVariables());
rightPredicates.removeSubCNF(leftEntry.getProcessedVariables());
CNF predicates = leftPredicates.and(rightPredicates).getSubCNF(allVariables);
return new CNF(
predicates.getPredicates()
.stream()
.filter(p ->
p.size() == 1 && p.getPredicates().get(0).getComparator().equals(Comparator.EQ)
).collect(Collectors.toList())
);
}
/**
* Creates an {@link CartesianProductNode} from the specified arguments.
*
* @param leftEntry left entry
* @param rightEntry right entry
*
* @return new expand node
*/
private PlanTableEntry createCartesianProductEntry(PlanTableEntry leftEntry,
PlanTableEntry rightEntry) {
CartesianProductNode node = new CartesianProductNode(
leftEntry.getQueryPlan().getRoot(),
rightEntry.getQueryPlan().getRoot(),
vertexStrategy, edgeStrategy
);
Set<String> processedVariables = leftEntry.getProcessedVariables();
processedVariables.addAll(rightEntry.getProcessedVariables());
CNF predicates = mergePredicates(leftEntry, rightEntry);
return new PlanTableEntry(
GRAPH,
processedVariables,
predicates,
new QueryPlanEstimator(new QueryPlan(node), queryHandler, graphStatistics)
);
}
/**
* Creates an {@link ValueJoinNode} from the specified arguments.
*
* @param leftEntry left entry
* @param rightEntry right entry
* @param joinPredicate join predicate
*
* @return new value join node
*/
private PlanTableEntry createValueJoinEntry(PlanTableEntry leftEntry,
PlanTableEntry rightEntry, CNF joinPredicate) {
List<Pair<String, String>> leftProperties = new ArrayList<>();
List<Pair<String, String>> rightProperties = new ArrayList<>();
for (CNFElement e : joinPredicate.getPredicates()) {
ComparisonExpression comparison = e.getPredicates().get(0);
Pair<String, String> joinProperty = extractJoinProperty(comparison.getLhs());
if (leftEntry.getAllVariables().contains(joinProperty.getKey())) {
leftProperties.add(joinProperty);
} else {
rightProperties.add(joinProperty);
}
joinProperty = extractJoinProperty(comparison.getRhs());
if (leftEntry.getAllVariables().contains(joinProperty.getKey())) {
leftProperties.add(joinProperty);
} else {
rightProperties.add(joinProperty);
}
}
ValueJoinNode node = new ValueJoinNode(
leftEntry.getQueryPlan().getRoot(),
rightEntry.getQueryPlan().getRoot(),
leftProperties, rightProperties,
vertexStrategy, edgeStrategy
);
Set<String> processedVariables = leftEntry.getProcessedVariables();
processedVariables.addAll(rightEntry.getProcessedVariables());
CNF predicates = mergePredicates(leftEntry, rightEntry);
return new PlanTableEntry(
GRAPH,
processedVariables,
predicates,
new QueryPlanEstimator(new QueryPlan(node), queryHandler, graphStatistics)
);
}
/**
* Turns a QueryComparable into a {@code Pair<Variable, PropertyKey>}
* @param comparable query comparable
* @return join property
*/
private Pair<String, String> extractJoinProperty(QueryComparable comparable) {
if (comparable instanceof PropertySelectorComparable) {
PropertySelectorComparable propertySelector = (PropertySelectorComparable) comparable;
return Pair.of(propertySelector.getVariable(), propertySelector.getPropertyKey());
} else {
//TODO #580 Include ElementSelector -> ID needs to be projected as property
throw new RuntimeException("Comparable " + comparable + "cant be used for ValueJoin");
}
}
/**
* Creates a new predicate that includes only elements that exist in both input predicates
*
* @param leftEntry left side plant table entry
* @param rightEntry right side plan table entry
* @return Merged predicates
*/
private CNF mergePredicates(PlanTableEntry leftEntry, PlanTableEntry rightEntry) {
CNF leftPredicates = new CNF(leftEntry.getPredicates());
CNF rightPredicates = new CNF(rightEntry.getPredicates());
leftPredicates.removeSubCNF(rightEntry.getProcessedVariables());
rightPredicates.removeSubCNF(leftEntry.getProcessedVariables());
return leftPredicates.and(rightPredicates);
}
}