AsciiGraphLoader.java
/*
* Copyright © 2014 - 2021 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.common.util;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.gradoop.common.model.api.entities.ElementFactoryProvider;
import org.gradoop.common.model.api.entities.GraphHead;
import org.gradoop.common.model.api.entities.Edge;
import org.gradoop.common.model.api.entities.Vertex;
import org.gradoop.common.model.impl.id.GradoopId;
import org.gradoop.common.model.impl.id.GradoopIdSet;
import org.gradoop.common.model.impl.properties.Properties;
import org.gradoop.gdl.GDLHandler;
import org.gradoop.gdl.exceptions.BailSyntaxErrorStrategy;
import org.gradoop.gdl.model.Graph;
import org.gradoop.gdl.model.GraphElement;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collection;
import java.util.Map;
/**
* Creates collections of graphs, vertices and edges from a given GDL script.
*
* @see <a href="https://github.com/dbs-leipzig/gdl">GDL on GitHub</a>
*
* @param <G> graph head type
* @param <V> vertex type
* @param <E> edge type
*/
public class AsciiGraphLoader<G extends GraphHead, V extends Vertex, E extends Edge> {
/**
* Factory provider for graph elements.
*/
private final ElementFactoryProvider<G, V, E> elementFactoryProvider;
/**
* Used to parse GDL scripts.
*/
private final GDLHandler gdlHandler;
/**
* Stores all graphs contained in the GDL script.
*/
private final Map<GradoopId, G> graphHeads;
/**
* Mapping between GDL ids and Gradoop IDs.
*/
private final Map<Long, GradoopId> graphHeadIds;
/**
* Stores all vertices contained in the GDL script.
*/
private final Map<GradoopId, V> vertices;
/**
* Mapping between GDL ids and Gradoop IDs.
*/
private final Map<Long, GradoopId> vertexIds;
/**
* Stores all edges contained in the GDL script.
*/
private final Map<GradoopId, E> edges;
/**
* Mapping between GDL ids and Gradoop IDs.
*/
private final Map<Long, GradoopId> edgeIds;
/**
* Stores graphs that are assigned to a variable.
*/
private final Map<String, G> graphHeadCache;
/**
* Stores vertices that are assigned to a variable.
*/
private final Map<String, V> vertexCache;
/**
* Stores edges that are assigned to a variable.
*/
private final Map<String, E> edgeCache;
/**
* Creates a new AsciiGraphLoader.
*
* @param gdlHandler GDL Handler
* @param elementFactoryProvider Factory provider for EPGM elements.
*/
private AsciiGraphLoader(GDLHandler gdlHandler,
ElementFactoryProvider<G, V, E> elementFactoryProvider) {
this.gdlHandler = gdlHandler;
this.elementFactoryProvider = elementFactoryProvider;
this.graphHeads = Maps.newHashMap();
this.vertices = Maps.newHashMap();
this.edges = Maps.newHashMap();
this.graphHeadIds = Maps.newHashMap();
this.vertexIds = Maps.newHashMap();
this.edgeIds = Maps.newHashMap();
this.graphHeadCache = Maps.newHashMap();
this.vertexCache = Maps.newHashMap();
this.edgeCache = Maps.newHashMap();
init();
}
/**
* Creates an AsciiGraphLoader from the given ASCII GDL string.
*
* @param asciiGraph GDL string
* @param elementFactoryProvider Factory provider for graph elements.
* @param <G> graph head type
* @param <V> vertex type
* @param <E> edge type
*
* @return AsciiGraphLoader
*/
public static
<G extends GraphHead, V extends Vertex, E extends Edge>
AsciiGraphLoader<G, V, E> fromString(String asciiGraph,
ElementFactoryProvider<G, V, E> elementFactoryProvider) {
return new AsciiGraphLoader<>(new GDLHandler.Builder()
.setDefaultGraphLabel(GradoopConstants.DEFAULT_GRAPH_LABEL)
.setDefaultVertexLabel(GradoopConstants.DEFAULT_VERTEX_LABEL)
.setDefaultEdgeLabel(GradoopConstants.DEFAULT_EDGE_LABEL)
.setErrorStrategy(new BailSyntaxErrorStrategy())
.buildFromString(asciiGraph),
elementFactoryProvider);
}
/**
* Creates an AsciiGraphLoader from the given ASCII GDL file.
*
* @param fileName File that contains a GDL script
* @param elementFactoryProvider Factory provider for graph elements.
* @param <G> graph head type
* @param <V> vertex type
* @param <E> edge type
*
* @return AsciiGraphLoader
* @throws IOException on failure
*/
public static
<G extends GraphHead, V extends Vertex, E extends Edge>
AsciiGraphLoader<G, V, E> fromFile(String fileName,
ElementFactoryProvider<G, V, E> elementFactoryProvider)
throws IOException {
return new AsciiGraphLoader<>(new GDLHandler.Builder()
.setDefaultGraphLabel(GradoopConstants.DEFAULT_GRAPH_LABEL)
.setDefaultVertexLabel(GradoopConstants.DEFAULT_VERTEX_LABEL)
.setDefaultEdgeLabel(GradoopConstants.DEFAULT_EDGE_LABEL)
.setErrorStrategy(new BailSyntaxErrorStrategy())
.buildFromFile(fileName),
elementFactoryProvider);
}
/**
* Creates an AsciiGraphLoader from the given ASCII GDL file.
*
* @param inputStream File that contains a GDL script
* @param elementFactoryProvider Factory provider for graph elements.
* @param <G> graph head type
* @param <V> vertex type
* @param <E> edge type
*
* @return AsciiGraphLoader
* @throws IOException on failure
*/
public static
<G extends GraphHead, V extends Vertex, E extends Edge>
AsciiGraphLoader<G, V, E> fromStream(InputStream inputStream,
ElementFactoryProvider<G, V, E> elementFactoryProvider)
throws IOException {
return new AsciiGraphLoader<>(new GDLHandler.Builder()
.setDefaultGraphLabel(GradoopConstants.DEFAULT_GRAPH_LABEL)
.setDefaultVertexLabel(GradoopConstants.DEFAULT_VERTEX_LABEL)
.setDefaultEdgeLabel(GradoopConstants.DEFAULT_EDGE_LABEL)
.setErrorStrategy(new BailSyntaxErrorStrategy())
.buildFromStream(inputStream),
elementFactoryProvider);
}
/**
* Appends the given ASCII GDL to the graph handled by that loader.
*
* Variables that were previously used, can be reused in the given script and
* refer to the same entities.
*
* @param asciiGraph GDL string
*/
public void appendFromString(String asciiGraph) {
this.gdlHandler.append(asciiGraph);
init();
}
// ---------------------------------------------------------------------------
// Graph methods
// ---------------------------------------------------------------------------
/**
* Returns all GraphHeads contained in the ASCII graph.
*
* @return graphHeads
*/
public Collection<G> getGraphHeads() {
return new ImmutableSet.Builder<G>()
.addAll(graphHeads.values()).build();
}
/**
* Returns GraphHead by given variable.
*
* @param variable variable used in GDL script
* @return graphHead or {@code null} if graph is not cached
*/
public G getGraphHeadByVariable(String variable) {
return getGraphHeadCache().get(variable);
}
/**
* Returns GraphHeads by their given variables.
*
* @param variables variables used in GDL script
* @return graphHeads that are assigned to the given variables
*/
public Collection<G> getGraphHeadsByVariables(String... variables) {
Collection<G> result =
Sets.newHashSetWithExpectedSize(variables.length);
for (String variable : variables) {
G graphHead = getGraphHeadByVariable(variable);
if (graphHead != null) {
result.add(graphHead);
}
}
return result;
}
// ---------------------------------------------------------------------------
// Vertex methods
// ---------------------------------------------------------------------------
/**
* Returns all vertices contained in the ASCII graph.
*
* @return vertices
*/
public Collection<V> getVertices() {
return new ImmutableSet.Builder<V>().addAll(vertices.values()).build();
}
/**
* Returns vertex by its given variable.
*
* @param variable variable used in GDL script
* @return vertex or {@code null} if not present
*/
public V getVertexByVariable(String variable) {
return vertexCache.get(variable);
}
/**
* Returns vertices by their given variables.
*
* @param variables variables used in GDL script
* @return vertices
*/
public Collection<V> getVerticesByVariables(String... variables) {
Collection<V> result = Sets.newHashSetWithExpectedSize(variables.length);
for (String variable : variables) {
V vertex = getVertexByVariable(variable);
if (vertex != null) {
result.add(vertex);
}
}
return result;
}
/**
* Returns all vertices that belong to the given graphs.
*
* @param graphIds graph identifiers
* @return vertices that are contained in the graphs
*/
public Collection<V> getVerticesByGraphIds(GradoopIdSet graphIds) {
Collection<V> result = Sets.newHashSetWithExpectedSize(graphIds.size());
for (V vertex : vertices.values()) {
if (vertex.getGraphIds().containsAny(graphIds)) {
result.add(vertex);
}
}
return result;
}
/**
* Returns all vertices that belong to the given graph variables.
*
* @param graphVariables graph variables used in the GDL script
* @return vertices that are contained in the graphs
*/
public Collection<V> getVerticesByGraphVariables(String... graphVariables) {
GradoopIdSet graphIds = new GradoopIdSet();
for (G graphHead : getGraphHeadsByVariables(graphVariables)) {
graphIds.add(graphHead.getId());
}
return getVerticesByGraphIds(graphIds);
}
// ---------------------------------------------------------------------------
// Edge methods
// ---------------------------------------------------------------------------
/**
* Returns all edges contained in the ASCII graph.
*
* @return edges
*/
public Collection<E> getEdges() {
return new ImmutableSet.Builder<E>().addAll(edges.values()).build();
}
/**
* Returns edge by its given variable.
*
* @param variable variable used in GDL script
* @return edge or {@code null} if not present
*/
public E getEdgeByVariable(String variable) {
return edgeCache.get(variable);
}
/**
* Returns edges by their given variables.
*
* @param variables variables used in GDL script
* @return edges
*/
public Collection<E> getEdgesByVariables(String... variables) {
Collection<E> result = Sets.newHashSetWithExpectedSize(variables.length);
for (String variable : variables) {
E edge = edgeCache.get(variable);
if (edge != null) {
result.add(edge);
}
}
return result;
}
/**
* Returns all edges that belong to the given graphs.
*
* @param graphIds Graph identifiers
* @return edges
*/
public Collection<E> getEdgesByGraphIds(GradoopIdSet graphIds) {
Collection<E> result = Sets.newHashSetWithExpectedSize(graphIds.size());
for (E edge : edges.values()) {
if (edge.getGraphIds().containsAny(graphIds)) {
result.add(edge);
}
}
return result;
}
/**
* Returns all edges that belong to the given graph variables.
*
* @param variables graph variables used in the GDL script
* @return edges
*/
public Collection<E> getEdgesByGraphVariables(String... variables) {
GradoopIdSet graphIds = new GradoopIdSet();
for (G graphHead : getGraphHeadsByVariables(variables)) {
graphIds.add(graphHead.getId());
}
return getEdgesByGraphIds(graphIds);
}
// ---------------------------------------------------------------------------
// Caches
// ---------------------------------------------------------------------------
/**
* Returns all graph heads that are bound to a variable in the GDL script.
*
* @return variable to graphHead mapping
*/
public Map<String, G> getGraphHeadCache() {
return new ImmutableMap.Builder<String, G>().putAll(graphHeadCache)
.build();
}
/**
* Returns all vertices that are bound to a variable in the GDL script.
*
* @return variable to vertex mapping
*/
public Map<String, V> getVertexCache() {
return new ImmutableMap.Builder<String, V>().putAll(vertexCache).build();
}
/**
* Returns all edges that are bound to a variable in the GDL script.
*
* @return variable to edge mapping
*/
public Map<String, E> getEdgeCache() {
return new ImmutableMap.Builder<String, E>().putAll(edgeCache).build();
}
// ---------------------------------------------------------------------------
// Private init methods
// ---------------------------------------------------------------------------
/**
* Initializes the AsciiGraphLoader
*/
private void init() {
initGraphHeads();
initVertices();
initEdges();
}
/**
* Initializes GraphHeads and their cache.
*/
private void initGraphHeads() {
for (Graph g : gdlHandler.getGraphs()) {
if (!graphHeadIds.containsKey(g.getId())) {
initGraphHead(g);
}
}
for (Map.Entry<String, Graph> e : gdlHandler.getGraphCache().entrySet()) {
updateGraphCache(e.getKey(), e.getValue());
}
}
/**
* Initializes vertices and their cache.
*/
private void initVertices() {
for (org.gradoop.gdl.model.Vertex v : gdlHandler.getVertices()) {
initVertex(v);
}
for (Map.Entry<String, org.gradoop.gdl.model.Vertex> e : gdlHandler.getVertexCache().entrySet()) {
updateVertexCache(e.getKey(), e.getValue());
}
}
/**
* Initializes edges and their cache.
*/
private void initEdges() {
for (org.gradoop.gdl.model.Edge e : gdlHandler.getEdges()) {
initEdge(e);
}
for (Map.Entry<String, org.gradoop.gdl.model.Edge> e : gdlHandler.getEdgeCache().entrySet()) {
updateEdgeCache(e.getKey(), e.getValue());
}
}
/**
* Creates a new Graph from the GDL Loader.
*
* @param g graph from GDL Loader
* @return graph head
*/
private G initGraphHead(Graph g) {
G graphHead = elementFactoryProvider.getGraphHeadFactory().createGraphHead(
g.getLabel(), Properties.createFromMap(g.getProperties()));
graphHeadIds.put(g.getId(), graphHead.getId());
graphHeads.put(graphHead.getId(), graphHead);
return graphHead;
}
/**
* Creates a new Vertex from the GDL Loader or updates an existing one.
*
* @param v vertex from GDL Loader
* @return vertex
*/
private V initVertex(org.gradoop.gdl.model.Vertex v) {
V vertex;
if (!vertexIds.containsKey(v.getId())) {
vertex = elementFactoryProvider.getVertexFactory().createVertex(
v.getLabel(),
Properties.createFromMap(v.getProperties()),
createGradoopIdSet(v));
vertexIds.put(v.getId(), vertex.getId());
vertices.put(vertex.getId(), vertex);
} else {
vertex = vertices.get(vertexIds.get(v.getId()));
vertex.setGraphIds(createGradoopIdSet(v));
}
return vertex;
}
/**
* Creates a new Edge from the GDL Loader.
*
* @param e edge from GDL loader
* @return edge
*/
private E initEdge(org.gradoop.gdl.model.Edge e) {
E edge;
if (!edgeIds.containsKey(e.getId())) {
edge = elementFactoryProvider.getEdgeFactory().createEdge(
e.getLabel(),
vertexIds.get(e.getSourceVertexId()),
vertexIds.get(e.getTargetVertexId()),
Properties.createFromMap(e.getProperties()),
createGradoopIdSet(e));
edgeIds.put(e.getId(), edge.getId());
edges.put(edge.getId(), edge);
} else {
edge = edges.get(edgeIds.get(e.getId()));
edge.setGraphIds(createGradoopIdSet(e));
}
return edge;
}
/**
* Updates the graph cache.
*
* @param variable graph variable used in GDL script
* @param g graph from GDL loader
*/
private void updateGraphCache(String variable, Graph g) {
graphHeadCache.put(
variable, graphHeads.get(graphHeadIds.get(g.getId())));
}
/**
* Updates the vertex cache.
*
* @param variable vertex variable used in GDL script
* @param v vertex from GDL loader
*/
private void updateVertexCache(String variable, org.gradoop.gdl.model.Vertex v) {
vertexCache.put(variable, vertices.get(vertexIds.get(v.getId())));
}
/**
* Updates the edge cache.
*
* @param variable edge variable used in the GDL script
* @param e edge from GDL loader
*/
private void updateEdgeCache(String variable, org.gradoop.gdl.model.Edge e) {
edgeCache.put(variable, edges.get(edgeIds.get(e.getId())));
}
/**
* Creates a {@code GradoopIDSet} from the long identifiers stored at the
* given graph element.
*
* @param e graph element
* @return GradoopIDSet for the given element
*/
private GradoopIdSet createGradoopIdSet(GraphElement e) {
GradoopIdSet result = new GradoopIdSet();
for (Long graphId : e.getGraphs()) {
result.add(graphHeadIds.get(graphId));
}
return result;
}
}