Subsumption.java
/*
* Copyright © 2014 - 2021 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.temporal.model.impl.operators.matching.common.query.postprocessing.transformation;
import org.gradoop.flink.model.impl.operators.matching.common.query.predicates.CNF;
import org.gradoop.flink.model.impl.operators.matching.common.query.predicates.CNFElement;
import org.gradoop.flink.model.impl.operators.matching.common.query.predicates.expressions.ComparisonExpression;
import org.gradoop.temporal.model.impl.operators.matching.common.query.postprocessing.QueryTransformation;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;
/**
* Abstract base class for subsumptions of clauses.
* A simplifying assumption is made: smaller clauses are not subsumed by larger ones.
* It is not guaranteed that all subsumptions or the optimal sequence of subsumptions are applied
*/
public abstract class Subsumption implements QueryTransformation {
@Override
public CNF transformCNF(CNF cnf) {
// separate singleton clauses from disjunctive clauses
ArrayList<CNFElement> necessaryClauses = cnf.getPredicates().stream()
.filter(clause -> clause.size() == 1).collect(Collectors.toCollection(ArrayList::new));
List<CNFElement> disjunctiveClauses = cnf.getPredicates().stream()
.filter(clause -> clause.size() > 1)
.collect(Collectors.toList());
// apply subsumption within disjunctive clauses while keeping the singletons and ...
cnf = new CNF(necessaryClauses).and(
subsumeDisjunctiveClauses(disjunctiveClauses));
List<CNFElement> oldClauses = cnf.getPredicates();
// ... apply subsumption to the result
return new CNF(subsumeClauses(oldClauses));
}
/**
* Sort clauses: clauses that subsume most other clauses are sorted at the beginning of
* the list
*
* @param clauses clauses to sort
* @return sorted clauses
*/
protected ArrayList<CNFElement> sortClauses(List<CNFElement> clauses) {
// count for every clause how much clauses it subsumes
HashMap<CNFElement, Integer> countSubsumes = new HashMap<>();
for (int i = 0; i < clauses.size(); i++) {
CNFElement clause1 = clauses.get(i);
countSubsumes.putIfAbsent(clause1, 0);
for (int j = i; j < clauses.size(); j++) {
CNFElement clause2 = clauses.get(j);
if (subsumes(clause1, clause2)) {
countSubsumes.put(clause1, countSubsumes.get(clause1) + 1);
}
if (subsumes(clause2, clause1)) {
countSubsumes.putIfAbsent(clause2, 0);
countSubsumes.put(clause2, countSubsumes.get(clause2) + 1);
}
}
}
// sort by these counts. If two clauses subsume the same number of clauses,
// the shorter clause is sorted first
ArrayList<CNFElement> clausesSorted = new ArrayList<>(clauses);
clausesSorted.sort(new Comparator<CNFElement>() {
@Override
public int compare(CNFElement t1, CNFElement t2) {
int subsumedDifference = countSubsumes.get(t2) -
countSubsumes.get(t1);
return subsumedDifference != 0 ? subsumedDifference :
t1.size() - t2.size();
}
});
return clausesSorted;
}
/**
* Performs the subsumption of a set of clauses
*
* @param clauses clauses for subsumption
* @return sublist of the clauses after subsumption
*/
protected ArrayList<CNFElement> subsumeClauses(List<CNFElement> clauses) {
ArrayList<CNFElement> oldClauses = sortClauses(clauses);
// points to clauses that are already subsumed (their index in the clauses list)
HashSet<Integer> subsumed = new HashSet<>();
// check for every clause if it subsumes other clauses
for (int i = 0; i < oldClauses.size(); i++) {
// clause c_i is already subsumed, i.e. actually not there anymore
if (subsumed.contains(i)) {
continue;
}
// clause c_i is not subsumed yet, i.e. may subsume other clauses
CNFElement c1 = oldClauses.get(i);
for (int j = i + 1; j < oldClauses.size(); j++) {
// clause c_j is already subsumed, i.e. actually not there anymore
if (subsumed.contains(j)) {
continue;
}
// clause c_j is not subsumed yet, i.e. may be subsumed
CNFElement c2 = oldClauses.get(j);
if (subsumes(c1, c2)) {
subsumed.add(j);
}
}
}
// only clauses that are not subsumed by other clauses are contained in the new CNF
ArrayList<CNFElement> newClauses = new ArrayList<>();
for (int i = 0; i < oldClauses.size(); i++) {
if (subsumed.contains(i)) {
continue;
}
newClauses.add(oldClauses.get(i));
}
return newClauses;
}
/**
* Applies {@link this::subsumeDisjunctiveClause} to all disjunctive clauses
* (CNF clauses of size > 1)
*
* @param disjClauses clauses
* @return CNF conjoining the subsumed clauses
*/
protected CNF subsumeDisjunctiveClauses(List<CNFElement> disjClauses) {
List<CNFElement> newClauses = disjClauses.stream()
.map(this::subsumeDisjunctiveClause)
.collect(Collectors.toList());
return new CNF(newClauses);
}
/**
* Applies subsumption to a disjunctive clause (might simplify the clause)
*
* @param clause clause to apply subsumption to.
* @return subsumed clause
*/
protected CNFElement subsumeDisjunctiveClause(CNFElement clause) {
return new CNFElement(subsumeClauses(
clause.getPredicates().stream()
.map(comparison -> new CNFElement(Collections.singletonList(comparison)))
.collect(Collectors.toList()))
.stream()
.map(c -> c.getPredicates().get(0))
.collect(Collectors.toList()));
}
/**
* Checks for two disjunctive clauses c1 and c2 whether c1 subsumes c2.
* Here, c1 subsumes c2 iff c1's comparisons form a subset of c2's comparisons
*
* @param c1 first clause
* @param c2 second clause
* @return true iff c1 subsumes c2
*/
protected boolean subsumes(CNFElement c1, CNFElement c2) {
for (ComparisonExpression comp1 : c1) {
boolean foundMatch = false;
for (ComparisonExpression comp2 : c2) {
if (subsumes(comp1, comp2)) {
foundMatch = true;
break;
}
}
if (!foundMatch) {
return false;
}
}
return true;
}
/**
* Checks whether a ComparisonExpression c1 subsumes c2
*
* @param c1 potentially subsuming comparison
* @param c2 potentially subsumed comparison
* @return true iff c1 subsumes c2
*/
public abstract boolean subsumes(ComparisonExpression c1,
ComparisonExpression c2);
}