GradoopParquetTypeBuilder.java
/*
* Copyright © 2014 - 2021 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.flink.io.impl.parquet.plain.common;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Types;
import org.gradoop.common.model.impl.id.GradoopId;
/**
* Simple builder for parquet types with support for common EPGM/TPGM field types.
*/
public class GradoopParquetTypeBuilder {
/**
* the internal parquet type builder
*/
private final Types.GroupBuilder<MessageType> messageTypeBuilder = Types.buildMessage();
/**
* Adds a parquet type for a gradoop id field.
*
* @param label record field label
* @return the builder instance
*/
public GradoopParquetTypeBuilder addGradoopIdField(String label) {
this.messageTypeBuilder.addField(
Types.required(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
.length(GradoopId.ID_SIZE)
.named(label)
);
return this;
}
/**
* Adds a parquet type for a string field.
*
* @param label record field label
* @return the builder instance
*/
public GradoopParquetTypeBuilder addStringField(String label) {
this.messageTypeBuilder.addField(
Types.required(PrimitiveType.PrimitiveTypeName.BINARY)
.as(LogicalTypeAnnotation.stringType())
.named(label)
);
return this;
}
/**
* Adds a parquet type for a properties field.
*
* @param label record field label
* @return the builder instance
*/
public GradoopParquetTypeBuilder addPropertiesField(String label) {
this.messageTypeBuilder.addField(
Types.optionalMap()
.key(PrimitiveType.PrimitiveTypeName.BINARY)
.as(LogicalTypeAnnotation.stringType())
.requiredValue(PrimitiveType.PrimitiveTypeName.BINARY)
.named(label)
);
return this;
}
/**
* Adds a parquet type for a gradoop id set field.
*
* @param label record field label
* @return the builder instance
*/
public GradoopParquetTypeBuilder addGradoopIdSetField(String label) {
this.messageTypeBuilder.addField(
Types.requiredList()
.requiredElement(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
.length(GradoopId.ID_SIZE)
.named(label)
);
return this;
}
/**
* Adds a parquet type for a time interval (<code>Tuple2<Long, Long></code>) field.
*
* @param label record field label
* @return the builder instance
*/
public GradoopParquetTypeBuilder addTimeInterval(String label) {
this.messageTypeBuilder.addField(
Types.requiredGroup()
.addFields(
Types.required(PrimitiveType.PrimitiveTypeName.INT64)
.as(LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
.named("from"),
Types.required(PrimitiveType.PrimitiveTypeName.INT64)
.as(LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
.named("to")
)
.named(label)
);
return this;
}
/**
* Creates the configured message type.
*
* @param name message type's name
* @return configured message type
*/
public MessageType build(String name) {
return this.messageTypeBuilder.named(name);
}
}