GradoopIdSet.java

/*
 * Copyright © 2014 - 2021 Leipzig University (Database Research Group)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.gradoop.common.model.impl.id;

import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataOutputView;
import org.apache.flink.types.Value;

import java.io.IOException;
import java.util.AbstractSet;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;

/**
 * Represents a set of {@link GradoopId} instances, ignoring any duplicates.
 *
 * Note that by implementing {@link java.util.List} Flink uses the Kryo serializer for
 * (de-)serializing the list.
 *
 * @see GradoopId
 */
public class GradoopIdSet extends AbstractSet<GradoopId> implements Value {
  /**
   * Contains the set of gradoop ids.
   */
  private Set<GradoopId> ids;

  /**
   * Required default constructor for instantiation by serialization logic.
   */
  public GradoopIdSet() {
    this.ids = new HashSet<>();
  }

  /**
   * Initializes the set with the given byte array.
   *
   * @param bytes bytes representing multiple gradoop ids
   */
  private GradoopIdSet(byte[] bytes) {
    this.ids = readIds(bytes);
  }

  /**
   * Initializes the set with the given ids.
   *
   * @param ids a collection of {@link GradoopId}s
   */
  private GradoopIdSet(Collection<GradoopId> ids) {
    this.ids = new HashSet<>(ids);
  }

  /**
   * Since we will need to do operations on individual ids we need
   * to reconstruct the {@link GradoopId} instances.
   * @param bytes serialized sequence of {@link GradoopId}s
   * @return a set representation
   */
  private Set<GradoopId> readIds(byte[] bytes) {
    ids = new HashSet<>();
    for (int i = 0; i < bytes.length / GradoopId.ID_SIZE; i++) {
      byte[] idBytes = new byte[GradoopId.ID_SIZE];
      System.arraycopy(bytes, i * GradoopId.ID_SIZE, idBytes, 0, GradoopId.ID_SIZE);
      ids.add(GradoopId.fromByteArray(idBytes));
    }
    return ids;
  }

  /**
   * Serialize all ids into a byte array.
   * @param ids sequence of {@link GradoopId}s
   * @return a binary representation
   */
  private byte[] writeIds(Set<GradoopId> ids) {
    byte[] bytes = new byte[ids.size() * GradoopId.ID_SIZE];

    int i = 0;
    for (GradoopId id : ids) {
      System.arraycopy(id.toByteArray(), 0, bytes, i * GradoopId.ID_SIZE, GradoopId.ID_SIZE);
      i++;
    }
    return bytes;
  }

  /**
   * Creates a new instance from multiple GradoopIDs.
   *
   * @param ids array of gradoop ids
   * @return gradoop id set
   */
  public static GradoopIdSet fromExisting(GradoopId... ids) {
    return fromExisting(Arrays.asList(ids));
  }

  /**
   * Creates a new instance from multiple GradoopIDs.
   *
   * @param ids given ids
   * @return gradoop id set
   */
  public static GradoopIdSet fromExisting(Collection<GradoopId> ids) {
    return new GradoopIdSet(ids);
  }

  /**
   * Creates a new instance from multiple GradoopIDs represented as byte array.
   *
   * @param bytes byte array representing multiple gradoop ids
   * @return gradoop id set
   */
  public static GradoopIdSet fromByteArray(byte[] bytes) {
    return new GradoopIdSet(bytes);
  }

  /**
   * Adds the given gradoop id to the set.
   *
   * @param id the id to add
   */
  @Override
  public boolean add(GradoopId id) {
    return this.ids.add(id);
  }

  /**
   * Adds the given gradoop ids to the set.
   *
   * @param ids the ids to add
   */
  public void addAll(GradoopIdSet ids) {
    this.ids.addAll(ids.ids);
  }

  /**
   * Adds the given gradoop ids to the set.
   *
   * @param ids the ids to add
   */
  @Override
  public boolean addAll(Collection<? extends GradoopId> ids) {
    return this.ids.addAll(ids);
  }

  /**
   * Checks if the given id is contained in the set.
   *
   * @param identifier the id to look for
   * @return true, iff the given id is in the set
   */
  @Override
  public boolean contains(Object identifier) {
    return this.ids.contains(identifier);
  }

  /**
   * Checks if the specified ids are contained in the set.
   *
   * @param other the ids to look for
   * @return true, iff all specified ids are contained in the set
   */
  public boolean containsAll(GradoopIdSet other) {
    if (other.size() > this.size()) {
      return false;
    }
    for (GradoopId id : other) {
      if (!this.contains(id)) {
        return false;
      }
    }
    return true;
  }

  /**
   * Checks if the specified ids are contained in the set.
   *
   * @param other the ids to look for
   * @return true, iff all specified ids are contained in the set
   */
  @Override
  public boolean containsAll(Collection<?> other) {
    if (other instanceof Set && other.size() > this.size()) {
      return false;
    }
    for (Object id : other) {
      if (!this.contains(id)) {
        return false;
      }
    }
    return true;
  }

  /**
   * Checks if any of the specified ids is contained in the set.
   *
   * @param other the ids to look for
   * @return true, iff any of the specified ids is contained in the set
   */
  public boolean containsAny(GradoopIdSet other) {
    // Algorithm: the sizes of both lists might be vastly different
    // to prevent the case of iterating multiple times over large collections
    // we make sure to always iterate over the smaller one
    Set<GradoopId> iterate = this.ids;
    Set<GradoopId> contains = other.ids;
    int thisSize = this.size();
    int otherSize = other.size();
    if (thisSize > otherSize) {
      iterate = other.ids;
      contains = this.ids;
    }

    for (GradoopId id : iterate) {
      if (contains.contains(id)) {
        return true;
      }
    }
    return false;
  }

  /**
   * Checks if any of the specified ids is contained in the set.
   *
   * @param other the ids to look for
   * @return true, iff any of the specified ids is contained in the set
   */
  public boolean containsAny(Set<GradoopId> other) {
    Set<GradoopId> iterate = this.ids;
    Set<GradoopId> contains = other;
    int thisSize = this.size();
    int otherSize = other.size();
    if (thisSize > otherSize) {
      iterate = other;
      contains = this.ids;
    }

    for (GradoopId id : iterate) {
      if (contains.contains(id)) {
        return true;
      }
    }
    return false;
  }

  /**
   * Checks if the set is empty.
   *
   * @return true, iff the set contains no elements
   */
  public boolean isEmpty() {
    return ids.isEmpty();
  }

  @Override
  public Iterator<GradoopId> iterator() {
    return ids.iterator();
  }

  /**
   * Clears the set.
   */
  public void clear() {
    ids.clear();
  }

  /**
   * Returns the number of contained gradoop ids
   *
   * @return number of elements in the set
   */
  public int size() {
    return ids.size();
  }

  /**
   * Returns the byte representation of that set.
   *
   * @return byte array representation
   */
  public byte[] toByteArray() {
    return writeIds(ids);
  }

  @Override
  public void write(DataOutputView out) throws IOException {
    if (isEmpty()) {
      out.writeInt(0);
    } else {
      out.writeInt(size());
      out.write(writeIds(ids));
    }
  }

  @Override
  public void read(DataInputView in) throws IOException {
    int n = in.readInt();
    byte[] bytes = new byte[n * GradoopId.ID_SIZE];
    in.readFully(bytes);
    this.ids = readIds(bytes);
  }

  @Override
  public boolean equals(Object o) {
    boolean equal = this == o;

    if (!equal && o instanceof GradoopIdSet) {
      GradoopIdSet that = (GradoopIdSet) o;
      // same number of ids
      equal = this.size() == that.size();

      if (equal) {
        // same ids
        equal = this.ids.equals(that.ids);
      }
    }

    return equal;
  }

  @Override
  public int hashCode() {
    return ids.hashCode();
  }

  @Override
  public String toString() {
    if (isEmpty()) {
      return "[]";
    }

    Iterator<GradoopId> it = iterator();
    StringBuilder sb = new StringBuilder();
    sb.append('[');

    for (;;) {
      GradoopId id = it.next();
      sb.append(id);
      if (!it.hasNext()) {
        return sb.append(']').toString();
      }
      sb.append(',').append(' ');
    }
  }
}