BuildTuplesFromElements.java
/*
* Copyright © 2014 - 2021 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.flink.model.impl.operators.keyedgrouping.functions;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.gradoop.common.model.api.entities.Element;
import org.gradoop.common.model.impl.id.GradoopId;
import org.gradoop.common.model.impl.properties.PropertyValue;
import org.gradoop.flink.model.api.functions.AggregateFunction;
import org.gradoop.flink.model.api.functions.KeyFunction;
import java.util.List;
import java.util.Objects;
import static org.gradoop.common.model.impl.properties.PropertyValue.NULL_VALUE;
/**
* Build a tuple-based representation of elements for grouping.
* Tuples will contain some Gradoop IDs, all grouping keys followed by all properties to be
* aggregated.
* <p>
* <i>Note: </i> This function sets all grouping keys and aggregate values, make sure to set
* additional fields, if {@code tupleDataOffset} is not {@code 0}.
*
* @param <E> The element type.
*/
public class BuildTuplesFromElements<E extends Element>
implements MapFunction<E, Tuple>, ResultTypeQueryable<Tuple> {
/**
* The grouping key functions.
*/
private final List<KeyFunction<E, ?>> keys;
/**
* The aggregate functions.
*/
private final List<AggregateFunction> aggregateFunctions;
/**
* The number of fields to be reserved for IDs.
* Those fields will be of type {@link GradoopId}.
*/
private final int tupleDataOffset;
/**
* The types of the produced tuple.
*/
private final TypeInformation<?>[] elementTypes;
/**
* Reduce object instantiations.
*/
private final Tuple reuseTuple;
/**
* Initialize this function, setting the grouping keys and aggregate functions.
*
* @param tupleDataOffset The number of tuple fields reserved for IDs.
* @param keys The grouping keys.
* @param aggregateFunctions The aggregate functions used to determine the aggregate property
*/
public BuildTuplesFromElements(int tupleDataOffset, List<KeyFunction<E, ?>> keys,
List<AggregateFunction> aggregateFunctions) {
this.tupleDataOffset = tupleDataOffset;
if (tupleDataOffset < 0) {
throw new IllegalArgumentException("The number of reserved tuple fields must not be negative.");
}
this.keys = Objects.requireNonNull(keys);
this.aggregateFunctions = Objects.requireNonNull(aggregateFunctions);
final int tupleSize = tupleDataOffset + keys.size() + aggregateFunctions.size();
if (tupleSize > Tuple.MAX_ARITY) {
throw new UnsupportedOperationException("Number of elements is too high for tuple: " + tupleSize +
" (max.: " + Tuple.MAX_ARITY + ")");
}
elementTypes = new TypeInformation[tupleSize];
for (int i = 0; i < tupleDataOffset; i++) {
elementTypes[i] = TypeInformation.of(GradoopId.class);
}
// Fill grouping key types.
for (int i = 0; i < keys.size(); i++) {
elementTypes[i + tupleDataOffset] = keys.get(i).getType();
}
// Fill remaining spots with property value types.
for (int i = 0; i < aggregateFunctions.size(); i++) {
elementTypes[i + keys.size() + tupleDataOffset] = TypeInformation.of(PropertyValue.class);
}
reuseTuple = Tuple.newInstance(tupleSize);
// Fill first fields with default ID values.
for (int i = 0; i < tupleDataOffset; i++) {
reuseTuple.setField(GradoopId.NULL_VALUE, i);
}
}
@Override
public TypeInformation<Tuple> getProducedType() {
return new TupleTypeInfo<>(elementTypes);
}
@Override
public Tuple map(E element) throws Exception {
int field = tupleDataOffset;
for (KeyFunction<? super E, ?> key : keys) {
reuseTuple.setField(key.getKey(element), field);
field++;
}
for (AggregateFunction aggregateFunction : aggregateFunctions) {
final PropertyValue increment = aggregateFunction.getIncrement(element);
reuseTuple.setField(increment == null ? NULL_VALUE : increment, field);
field++;
}
return reuseTuple;
}
}