GraphStatisticsReader.java
/*
* Copyright © 2014 - 2021 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.flink.model.impl.operators.matching.common.statistics;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Base class for reading a {@link GraphStatistics} object from file system.
*/
public abstract class GraphStatisticsReader {
/**
* Separates tokens in a single line
*/
public static final String TOKEN_SEPARATOR = ",";
/**
* Single line containing the total vertex count, e.g.
*
* BOF
* 23
* EOF
*/
public static final String FILE_VERTEX_COUNT = "vertex_count";
/**
* Single line containing the total edge count, e.g.
*
* BOF
* 42
* EOF
*/
public static final String FILE_EDGE_COUNT = "edge_count";
/**
* Each line contains the label and its count, e.g.
*
* BOF
* Person,12
* University,11
* EOF
*/
public static final String FILE_VERTEX_COUNT_BY_LABEL = "vertex_count_by_label";
/**
* Each line contains a vertex and its degree, e.g.
*
* BOF
* v1,6
* v2,12
* EOF
*/
public static final String FILE_VERTEX_DEGREE_DISTRIBUTION = "vertex_degree_distribution";
/**
* Each line contains a vertex and its in-degree, e.g.
*
* BOF
* v1,3
* v2,4
* EOF
*/
public static final String INCOMING_VERTEX_DEGREE_DISTRIBUTION =
"incoming_vertex_degree_distribution";
/**
* Each line contains a vertex and its out-degree, e.g.
*
* BOF
* v1,3
* v2,8
* EOF
*/
public static final String OUTGOING_VERTEX_DEGREE_DISTRIBUTION =
"outgoing_vertex_degree_distribution";
/**
* Each line contains the label and its count, e.g.
*
* BOF
* knows,30
* studyAt,12
* EOF
*/
public static final String FILE_EDGE_COUNT_BY_LABEL = "edge_count_by_label";
/**
* Each line contains the source vertex label, the edge label and the frequency., e.g.
*
* BOF
* Person,knows,30
* Person,studyAt,12
* EOF
*/
public static final String FILE_EDGE_COUNT_BY_SOURCE_VERTEX_AND_EDGE_LABEL =
"edge_count_by_source_vertex_and_edge_label";
/**
* Each line contains the target vertex label, the edge label and the frequency, e.g.
*
* BOF
* Person,knows,30
* University,studyAt,12
* EOF
*/
public static final String FILE_EDGE_COUNT_BY_TARGET_VERTEX_AND_EDGE_LABEL =
"edge_count_by_target_vertex_and_edge_label";
/**
* One line containing the number of distinct source vertices, e.g.
*
* BOF
* 23
* EOF
*/
public static final String FILE_DISTINCT_SOURCE_VERTEX_COUNT = "distinct_source_vertex_count";
/**
* One line containing the number of distinct target vertices, e.g.
*
* BOF
* 42
* EOF
*/
public static final String FILE_DISTINCT_TARGET_VERTEX_COUNT = "distinct_target_vertex_count";
/**
* Each line contains the edge label and the number of distinct source ids, e.g.
*
* BOF
* knows,10
* studyAt,12
* EOF
*/
public static final String FILE_DISTINCT_SOURCE_VERTEX_COUNT_BY_EDGE_LABEL =
"distinct_source_vertex_count_by_edge_label";
/**
* Each line contains the edge label and the number of distinct target ids, e.g.
*
* BOF
* knows,10
* studyAt,12
* EOF
*/
public static final String FILE_DISTINCT_TARGET_VERTEX_COUNT_BY_EDGE_LABEL =
"distinct_target_vertex_count_by_edge_label";
/**
* Each line contains the edge label a property name and the number of distinct property
* values for that pair, e.g.
*
* BOF
* knows,since,73
* connecting,isActive,2
* EOF
*/
public static final String FILE_DISTINCT_EDGE_PROPERTIES_BY_LABEL =
"distinct_edge_properties_by_label";
/**
* Each line contains the vertex label a property name and the number of distinct property
* values for that pair, e.g.
*
* BOF
* Person,age,100
* City,name,25
* EOF
*/
public static final String FILE_DISTINCT_VERTEX_PROPERTIES_BY_LABEL =
"distinct_vertex_properties_by_label";
/**
* Each line contains the edge property name and the number of distinct property
* values for that value
*
* BOF
* since,73
* isActive,2
* EOF
*/
public static final String FILE_DISTINCT_EDGE_PROPERTIES = "distinct_edge_properties";
/**
* Each line contains the vertex property name and the number of distinct property
* values for that value
*
* BOF
* age,100
* name,25
* EOF
*/
public static final String FILE_DISTINCT_VERTEX_PROPERTIES = "distinct_vertex_properties";
/**
* Reads a single {@link Long} value from the specified file.
*
* @param lines stream of lines in the file
* @return long value in first line of file
* @throws IOException if an I/O error occurs opening the file
*/
static Long readSingleValue(Stream<String> lines) throws IOException {
return lines
.map(Long::parseLong)
.collect(Collectors.toList())
.get(0);
}
/**
* Reads a key value map from the specified file.
*
* @param lines stream of lines in the file
* @return key value map
* @throws IOException if an I/O error occurs opening the file
*/
static Map<String, Long> readKeyValueMap(Stream<String> lines) throws IOException {
return lines
.map(s -> s.split(TOKEN_SEPARATOR))
.collect(Collectors.toMap(tokens -> tokens[0], tokens -> Long.parseLong(tokens[1])));
}
/**
* Reads a key value mapped from the given file grouped by the first token in each line.
*
* @param lines stream of lines in the file
* @return nested key value map
* @throws IOException if an I/O error occurs opening the file
*/
static Map<String, Map<String, Long>> readNestedKeyValueMap(Stream<String> lines)
throws IOException {
final Map<String, Map<String, Long>> mapping = new HashMap<>();
lines
.map(line -> line.split(TOKEN_SEPARATOR))
.forEach(tokens -> {
String vertexLabel = tokens[0];
String edgeLabel = tokens[1];
Long edgeCount = Long.parseLong(tokens[2]);
if (mapping.containsKey(vertexLabel)) {
mapping.get(vertexLabel).put(edgeLabel, edgeCount);
} else {
Map<String, Long> value = new HashMap<>();
value.put(edgeLabel, edgeCount);
mapping.put(vertexLabel, value);
}
});
return mapping;
}
}