CreateDictionary.java
/*
* Copyright © 2014 - 2021 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.flink.algorithms.fsm.dimspan.functions.preprocessing;
import com.google.common.collect.Lists;
import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.util.Collector;
import org.gradoop.flink.algorithms.fsm.dimspan.comparison.LabelComparator;
import org.gradoop.flink.model.impl.tuples.WithCount;
import java.util.List;
/**
* {@code (label, frequency),.. => [label,..]}
*/
public class CreateDictionary implements GroupReduceFunction<WithCount<String>, String[]> {
/**
* comparator used to determine frequency-dependent translation
*/
private final LabelComparator comparator;
/**
* Constructor.
*
* @param comparator label comparator
*/
public CreateDictionary(LabelComparator comparator) {
this.comparator = comparator;
}
@Override
public void reduce(
Iterable<WithCount<String>> iterable, Collector<String[]> collector) throws Exception {
// Sort distinct labels
List<WithCount<String>> stringsWithCount = Lists.newArrayList(iterable);
stringsWithCount.sort(comparator);
// Create dictionary with default label
List<String> dictionary = Lists.newArrayList();
for (WithCount<String> stringWithCount : stringsWithCount) {
dictionary.add(stringWithCount.getObject());
}
collector.collect(dictionary.toArray(new String[dictionary.size()]));
}
}