GDLEncoder.java

/*
 * Copyright © 2014 - 2021 Leipzig University (Database Research Group)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.gradoop.flink.io.impl.gdl;

import org.gradoop.common.model.api.entities.Edge;
import org.gradoop.common.model.api.entities.GraphHead;
import org.gradoop.common.model.api.entities.Vertex;
import org.gradoop.common.model.impl.id.GradoopId;
import org.gradoop.common.model.impl.properties.Properties;
import org.gradoop.common.model.impl.properties.Property;
import org.gradoop.common.model.impl.properties.PropertyValue;

import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * Encodes data structures using the GDL format.
 *
 * @param <G> graph head type
 * @param <V> vertex type
 * @param <E> edge type
 */
public class GDLEncoder<G extends GraphHead, V extends Vertex, E extends Edge> {

  /**
   * Marks the beginning of the definition of vertices and edges.
   */
  private static final String GRAPH_ELEMENTS_DEFINITION_START = "[";
  /**
   * Marks the end of the definition of vertices and edges.
   */
  private static final String GRAPH_ELEMENTS_DEFINITION_END = "]";
  /**
   * graph variable prefix
   */
  private static final String GRAPH_VARIABLE_PREFIX = "g";
  /**
   * vertex variable prefix
   */
  private static final String VERTEX_VARIABLE_PREFIX = "v";
  /**
   * edge variable prefix
   */
  private static final String EDGE_VARIABLE_PREFIX = "e";
  /**
   * Marks the end of the properties prefix.
   */
  private static final String PROPERTIES_PREFIX = "{";
  /**
   * Marks the end of the properties string.
   */
  private static final String PROPERTIES_SUFFIX = "}";
  /**
   * Separates properties.
   */
  private static final String PROPERTIES_SEPARATOR = ",";
  /**
   * Separates key and value for properties.
   */
  private static final String KEY_VALUE_SEPARATOR = ":";
  /**
   * Suffix for GDL double representation.
   */
  private static final String DOUBLE_SUFFIX = "d";
  /**
   * Suffix for GDL float representation.
   */
  private static final String FLOAT_SUFFIX = "f";
  /**
   * Suffix for GDL long representation.
   */
  private static final String LONG_SUFFIX = "L";
  /**
   * GDL null representation.
   */
  private static final String NULL_STRING = "NULL";
  /**
   * GDL string prefix
   */
  private static final String STRING_PREFIX = "\"";
  /**
   * GDL string suffix
   */
  private static final String STRING_SUFFIX = "\"";

  /**
   * Graph head to encode.
   */
  private List<G> graphHeads;
  /**
   * Vertices to encode.
   */
  private List<V> vertices;
  /**
   * Edges to encode.
   */
  private List<E> edges;

  /**
   * Creates a GDLEncoder using the passed parameters.
   *
   * @param graphHeads graph head that should be encoded
   * @param vertices vertices that should be encoded
   * @param edges edges that should be encoded
   */
  public GDLEncoder(List<G> graphHeads, List<V> vertices, List<E> edges) {
    this.graphHeads = graphHeads;
    this.vertices = vertices;
    this.edges = edges;
  }

  /**
   * Creates a GDL formatted string from the graph heads, vertices and edges.
   *
   * @return GDL formatted string
   */
  public String getGDLString() {
    Map<GradoopId, String> idToGraphHeadName = getGraphHeadNameMapping(graphHeads);
    Map<GradoopId, String> idToVertexName = getVertexNameMapping(vertices);
    Map<GradoopId, String> idToEdgeName = getEdgeNameMapping(edges);

    Set<GradoopId> usedVertexIds = new HashSet<>();
    Set<GradoopId> usedEdgeIds = new HashSet<>();

    StringBuilder result = new StringBuilder();

    for (G graphHead : graphHeads) {
      StringBuilder verticesString = new StringBuilder();
      StringBuilder edgesString = new StringBuilder();

      for (V vertex : vertices) {
        boolean containedInGraph = vertex.getGraphIds().contains(graphHead.getId());
        boolean firstOccurrence = !usedVertexIds.contains(vertex.getId());

        if (containedInGraph) {
          String vertexString = vertexToGDLString(vertex, idToVertexName, firstOccurrence);
          usedVertexIds.add(vertex.getId());
          verticesString.append(vertexString).append(System.lineSeparator());
        }
      }

      for (E edge : edges) {
        if (edge.getGraphIds().contains(graphHead.getId())) {
          boolean firstOccurrence = !usedEdgeIds.contains(edge.getId());
          String edgeString = edgeToGDLString(edge, idToVertexName, idToEdgeName, firstOccurrence);
          usedEdgeIds.add(edge.getId());
          edgesString.append(edgeString).append(System.lineSeparator());
        }
      }

      result
        .append(graphHeadToGDLString(graphHead, idToGraphHeadName))
        .append(GRAPH_ELEMENTS_DEFINITION_START).append(System.lineSeparator())
        .append(verticesString)
        .append(edgesString.length() > 0 ? System.lineSeparator() : "")
        .append(edgesString)
        .append(GRAPH_ELEMENTS_DEFINITION_END)
        .append(System.lineSeparator()).append(System.lineSeparator());
    }
    return result.toString();
  }

  /**
   * Returns a mapping between the graph heads gradoop ids and the GDL variable names.
   *
   * @param graphHeads The graph heads.
   * @return Mapping between graph head and GDL variable name.
   */
  private Map<GradoopId, String> getGraphHeadNameMapping(List<G> graphHeads) {
    Map<GradoopId, String> idToGraphHeadName = new HashMap<>(graphHeads.size());
    for (int i = 0; i < graphHeads.size(); i++) {
      G graphHead = graphHeads.get(i);
      String gName = String.format("%s%s", GRAPH_VARIABLE_PREFIX, i);
      idToGraphHeadName.put(graphHead.getId(), gName);
    }
    return idToGraphHeadName;
  }

  /**
   * Returns a mapping between the vertex GradoopID and the GDL variable name.
   *
   * @param vertices The graph vertices.
   * @return Mapping between vertex and GDL variable name.
   */
  private Map<GradoopId, String> getVertexNameMapping(List<V> vertices) {
    Map<GradoopId, String> idToVertexName = new HashMap<>(vertices.size());
    for (int i = 0; i < vertices.size(); i++) {
      V vertex = vertices.get(i);
      String vName = String.format("%s_%s_%s", VERTEX_VARIABLE_PREFIX, vertex.getLabel(), i);
      idToVertexName.put(vertex.getId(), vName);
    }
    return idToVertexName;
  }

  /**
   * Returns a mapping between the edge GradoopId and the GDL variable name.
   *
   * @param edges The graph edges.
   * @return Mapping between edge and GDL variable name.
   */
  private Map<GradoopId, String> getEdgeNameMapping(List<E> edges) {
    Map<GradoopId, String> idToEdgeName = new HashMap<>(edges.size());
    for (int i = 0; i < edges.size(); i++) {
      E edge = edges.get(i);
      String eName = String.format("%s_%s_%s", EDGE_VARIABLE_PREFIX, edge.getLabel(), i);
      idToEdgeName.put(edge.getId(), eName);
    }
    return idToEdgeName;
  }

  /**
   * Returns a GDL formatted graph head string.
   *
   * @param graphhead graph head
   * @param idToGraphHeadName mapping from graph head id to its GDL variable name
   * @return GDL formatted string
   */
  private String graphHeadToGDLString(G graphhead, Map<GradoopId, String> idToGraphHeadName) {
    return String.format("%s:%s %s",
      idToGraphHeadName.get(graphhead.getId()),
      graphhead.getLabel(),
      propertiesToGDLString(graphhead.getProperties()));
  }

  /**
   * Returns the gdl formatted vertex including the properties and the label on first occurrence
   * or otherwise just the variable name.
   *
   * @param vertex The vertex that should be formatted.
   * @param idToVertexName Maps GradoopId of a vertex to a string that represents the gdl
   *                       variable name
   * @param firstOccurrence Is it the first occurrence of the vertex in all graphs?
   * @return A GDL formatted vertex string.
   */
  private String vertexToGDLString(V vertex, Map<GradoopId, String> idToVertexName,
    boolean firstOccurrence) {
    if (firstOccurrence) {
      return String.format("(%s:%s %s)",
        idToVertexName.get(vertex.getId()),
        vertex.getLabel(),
        propertiesToGDLString(vertex.getProperties()));
    } else {
      return String.format("(%s)", idToVertexName.get(vertex.getId()));
    }
  }

  /**
   * Returns the GDL formatted edge, including the properties and the label on first occurrence
   * or otherwise just the variable name.
   *
   * @param edge The edge to be formatted.
   * @param idToVertexName Maps GradoopId of a vertex to a string that represents the GDL
   *                       variable name
   * @param idToEdgeName Maps GradoopId of an edge to a string that represents the GDL variable
   *                     name.
   * @param firstOccurrence Is it the first occurrence of the edge in all graphs?
   * @return A GDL formatted edge string.
   */
  private String edgeToGDLString(E edge, Map<GradoopId, String> idToVertexName,
    Map<GradoopId, String> idToEdgeName, boolean firstOccurrence) {
    String result;
    if (firstOccurrence) {
      result =  String.format("(%s)-[%s:%s%s]->(%s)",
        idToVertexName.get(edge.getSourceId()),
        idToEdgeName.get(edge.getId()),
        edge.getLabel(),
        propertiesToGDLString(edge.getProperties()),
        idToVertexName.get(edge.getTargetId()));
    } else {
      result = String.format("(%s)-[%s]->(%s)",
        idToVertexName.get(edge.getSourceId()),
        idToEdgeName.get(edge.getId()),
        idToVertexName.get(edge.getTargetId()));
    }
    return result;
  }

  /**
   * Returns the properties as a GDL formatted String.
   *
   * @param properties The properties to be formatted.
   * @return A GDL formatted string that represents the properties.
   */
  private String propertiesToGDLString(Properties properties) {
    if (properties == null || properties.isEmpty()) {
      return "";
    } else {
      return properties.toList().stream()
        .map(this::propertyToGDLString)
        .collect(Collectors.joining(PROPERTIES_SEPARATOR, PROPERTIES_PREFIX, PROPERTIES_SUFFIX));
    }
  }

  /**
   * Returns this property as a GDL formatted String.
   *
   * @param property The property.
   * @return A GDL formatted string that represents the property.
   */
  private String propertyToGDLString(Property property) {
    StringBuilder result = new StringBuilder()
      .append(property.getKey())
      .append(KEY_VALUE_SEPARATOR);

    PropertyValue value = property.getValue();

    if (value.isString()) {
      result.append(STRING_PREFIX).append(value.toString()).append(STRING_SUFFIX);
    } else if (value.isNull()) {
      result.append(NULL_STRING);
    } else if (value.isDouble()) {
      result.append(value.toString()).append(DOUBLE_SUFFIX);
    } else if (value.isFloat()) {
      result.append(value.toString()).append(FLOAT_SUFFIX);
    } else if (value.isLong()) {
      result.append(value.toString()).append(LONG_SUFFIX);
    } else {
      result.append(value.toString());
    }

    return result.toString();
  }
}