BinningTemporalGraphStatisticsFactory.java
/*
* Copyright © 2014 - 2021 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.temporal.model.impl.operators.matching.common.statistics.binning;
import org.apache.commons.compress.utils.Lists;
import org.apache.flink.api.java.io.LocalCollectionOutputFormat;
import org.gradoop.common.model.impl.pojo.EPGMElement;
import org.gradoop.temporal.model.impl.TemporalGraph;
import org.gradoop.temporal.model.impl.operators.matching.common.statistics.TemporalGraphStatisticsFactory;
import org.gradoop.temporal.model.impl.operators.matching.common.statistics.binning.functions.ElementsToStats;
import org.gradoop.temporal.model.impl.operators.matching.common.statistics.binning.pojo.TemporalElementStats;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
* Factory for {@link BinningTemporalGraphStatistics}
*/
public class BinningTemporalGraphStatisticsFactory implements
TemporalGraphStatisticsFactory<BinningTemporalGraphStatistics> {
/**
* Default size for a sample
*/
static final int DEFAULT_SAMPLE_SIZE = 5000;
@Override
public BinningTemporalGraphStatistics fromGraph(TemporalGraph g) throws Exception {
return fromGraphWithSampling(g, DEFAULT_SAMPLE_SIZE);
}
@Override
public BinningTemporalGraphStatistics fromGraph(TemporalGraph g, Set<String> numericalProperties,
Set<String> categoricalProperties) throws Exception {
return fromGraphWithSampling(g, DEFAULT_SAMPLE_SIZE, numericalProperties, categoricalProperties);
}
@Override
public BinningTemporalGraphStatistics fromGraphWithSampling(TemporalGraph g, int sampleSize)
throws Exception {
return fromGraphWithSampling(g, sampleSize, null, null);
}
@Override
public BinningTemporalGraphStatistics fromGraphWithSampling(TemporalGraph g, int sampleSize,
Set<String> numericalProperties,
Set<String> categoricalProperties)
throws Exception {
List<TemporalElementStats> vertexStats = Lists.newArrayList();
g.getVertices()
.groupBy(EPGMElement::getLabel)
.reduceGroup(new ElementsToStats<>(numericalProperties, categoricalProperties))
.output(new LocalCollectionOutputFormat<>(vertexStats));
List<TemporalElementStats> edgeStats = Lists.newArrayList();
g.getEdges()
// do not replace this with the method reference!!!
.groupBy(edge -> edge.getLabel())
.reduceGroup(new ElementsToStats<>(numericalProperties, categoricalProperties))
.output(new LocalCollectionOutputFormat<>(edgeStats));
g.getConfig().getExecutionEnvironment().execute();
HashSet<String> relevantProperties = null;
// both only null, if all properties should be considered
// (use empty lists to ignore all properties)
if (numericalProperties != null && categoricalProperties != null) {
relevantProperties = new HashSet<>();
relevantProperties.addAll(numericalProperties);
relevantProperties.addAll(categoricalProperties);
}
return new BinningTemporalGraphStatistics(vertexStats, edgeStats, relevantProperties);
}
}