diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 98bc5c0237..4c29817a96 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -192,18 +192,6 @@ protected LogicalTypeAnnotation fromString(List params) { protected abstract LogicalTypeAnnotation fromString(List params); } - /** - * Convert this logical type to old logical type representation in parquet-mr (if there's any). - * Those logical type implementations, which don't have a corresponding mapping should return null. - *

- * API should be considered private - * - * @return the OriginalType representation of the new logical type, or null if there's none - * @deprecated Please use the LogicalTypeAnnotation itself - */ - @Deprecated - public abstract OriginalType toOriginalType(); - /** * Visits this logical type with the given visitor * @@ -382,18 +370,6 @@ public static class StringLogicalTypeAnnotation extends LogicalTypeAnnotation { private StringLogicalTypeAnnotation() {} - /** - * API Should be considered private - * - * @return the original type - * @deprecated Please use the LogicalTypeAnnotation itself - */ - @Override - @Deprecated - public OriginalType toOriginalType() { - return OriginalType.UTF8; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -426,18 +402,6 @@ public static class MapLogicalTypeAnnotation extends LogicalTypeAnnotation { private MapLogicalTypeAnnotation() {} - /** - * API Should be considered private - * - * @return the original type - * @deprecated Please use the LogicalTypeAnnotation itself - */ - @Override - @Deprecated - public OriginalType toOriginalType() { - return OriginalType.MAP; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -465,18 +429,6 @@ public static class ListLogicalTypeAnnotation extends LogicalTypeAnnotation { private ListLogicalTypeAnnotation() {} - /** - * API Should be considered private - * - * @return the original type - * @deprecated Please use the LogicalTypeAnnotation itself - */ - @Override - @Deprecated - public OriginalType toOriginalType() { - return OriginalType.LIST; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -504,18 +456,6 @@ public static class EnumLogicalTypeAnnotation extends LogicalTypeAnnotation { private EnumLogicalTypeAnnotation() {} - /** - * API Should be considered private - * - * @return the original type - * @deprecated Please use the LogicalTypeAnnotation itself - */ - @Override - @Deprecated - public OriginalType toOriginalType() { - return OriginalType.ENUM; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -562,18 +502,6 @@ public int getScale() { return scale; } - /** - * API Should be considered private - * - * @return the original type - * @deprecated Please use the LogicalTypeAnnotation itself - */ - @Override - @Deprecated - public OriginalType toOriginalType() { - return OriginalType.DECIMAL; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -614,18 +542,6 @@ public static class DateLogicalTypeAnnotation extends LogicalTypeAnnotation { private DateLogicalTypeAnnotation() {} - /** - * API Should be considered private - * - * @return the original type - * @deprecated Please use the LogicalTypeAnnotation itself - */ - @Override - @Deprecated - public OriginalType toOriginalType() { - return OriginalType.DATE; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -668,25 +584,6 @@ private TimeLogicalTypeAnnotation(boolean isAdjustedToUTC, TimeUnit unit) { this.unit = unit; } - /** - * API Should be considered private - * - * @return the original type - * @deprecated Please use the LogicalTypeAnnotation itself - */ - @Override - @Deprecated - public OriginalType toOriginalType() { - switch (unit) { - case MILLIS: - return OriginalType.TIME_MILLIS; - case MICROS: - return OriginalType.TIME_MICROS; - default: - return null; - } - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -747,25 +644,6 @@ private TimestampLogicalTypeAnnotation(boolean isAdjustedToUTC, TimeUnit unit) { this.unit = unit; } - /** - * API Should be considered private - * - * @return the original type - * @deprecated Please use the LogicalTypeAnnotation itself - */ - @Override - @Deprecated - public OriginalType toOriginalType() { - switch (unit) { - case MILLIS: - return OriginalType.TIMESTAMP_MILLIS; - case MICROS: - return OriginalType.TIMESTAMP_MICROS; - default: - return null; - } - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -832,29 +710,6 @@ private IntLogicalTypeAnnotation(int bitWidth, boolean isSigned) { this.isSigned = isSigned; } - /** - * API Should be considered private - * - * @return the original type - * @deprecated Please use the LogicalTypeAnnotation itself - */ - @Override - @Deprecated - public OriginalType toOriginalType() { - switch (bitWidth) { - case 8: - return isSigned ? OriginalType.INT_8 : OriginalType.UINT_8; - case 16: - return isSigned ? OriginalType.INT_16 : OriginalType.UINT_16; - case 32: - return isSigned ? OriginalType.INT_32 : OriginalType.UINT_32; - case 64: - return isSigned ? OriginalType.INT_64 : OriginalType.UINT_64; - default: - return null; - } - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -903,18 +758,6 @@ public static class JsonLogicalTypeAnnotation extends LogicalTypeAnnotation { private JsonLogicalTypeAnnotation() {} - /** - * API Should be considered private - * - * @return the original type - * @deprecated Please use the LogicalTypeAnnotation itself - */ - @Override - @Deprecated - public OriginalType toOriginalType() { - return OriginalType.JSON; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -947,18 +790,6 @@ public static class BsonLogicalTypeAnnotation extends LogicalTypeAnnotation { private BsonLogicalTypeAnnotation() {} - /** - * API Should be considered private - * - * @return the original type - * @deprecated Please use the LogicalTypeAnnotation itself - */ - @Override - @Deprecated - public OriginalType toOriginalType() { - return OriginalType.BSON; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -992,19 +823,6 @@ public static class UUIDLogicalTypeAnnotation extends LogicalTypeAnnotation { private UUIDLogicalTypeAnnotation() {} - /** - * API Should be considered private - * - * @return the original type - * @deprecated Please use the LogicalTypeAnnotation itself - */ - @Override - @Deprecated - public OriginalType toOriginalType() { - // No OriginalType for UUID - return null; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -1088,18 +906,6 @@ public static LogicalTypeAnnotation getInstance() { private IntervalLogicalTypeAnnotation() {} - /** - * API Should be considered private - * - * @return the original type - * @deprecated Please use the LogicalTypeAnnotation itself - */ - @Override - @Deprecated - public OriginalType toOriginalType() { - return OriginalType.INTERVAL; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -1144,18 +950,6 @@ public static MapKeyValueTypeAnnotation getInstance() { private MapKeyValueTypeAnnotation() {} - /** - * API Should be considered private - * - * @return the original type - * @deprecated Please use the LogicalTypeAnnotation itself - */ - @Override - @Deprecated - public OriginalType toOriginalType() { - return OriginalType.MAP_KEY_VALUE; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java index bd682f92fd..3665e1a4ed 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java @@ -226,13 +226,6 @@ public LogicalTypeAnnotation getLogicalTypeAnnotation() { return logicalTypeAnnotation; } - /** - * @return the original type (LIST, MAP, ...) - */ - public OriginalType getOriginalType() { - return logicalTypeAnnotation == null ? null : logicalTypeAnnotation.toOriginalType(); - } - /** * @return if this is a primitive type */ diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java index 2f12991ab0..5e8254ec87 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java @@ -326,10 +326,6 @@ public P named(String name) { throw new IllegalStateException("[BUG] Parent and return type are null: must override named"); } } - - protected OriginalType getOriginalType() { - return logicalTypeAnnotation == null ? null : logicalTypeAnnotation.toOriginalType(); - } } public abstract static class BasePrimitiveBuilder> @@ -436,6 +432,9 @@ protected PrimitiveType build(String name) { } DecimalMetadata meta = decimalMetadata(); + if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) { + this.logicalTypeAnnotation = LogicalTypeAnnotation.fromOriginalType(OriginalType.DECIMAL, meta); + } // validate type annotations and required metadata if (logicalTypeAnnotation != null) { @@ -631,13 +630,7 @@ private Optional checkInt64PrimitiveType( logicalTypeAnnotation + " can not be applied to a primitive type")); } - if (newLogicalTypeSet) { - return new PrimitiveType( - repetition, primitiveType, length, name, logicalTypeAnnotation, id, columnOrder); - } else { - return new PrimitiveType( - repetition, primitiveType, length, name, getOriginalType(), meta, id, columnOrder); - } + return new PrimitiveType(repetition, primitiveType, length, name, logicalTypeAnnotation, id, columnOrder); } private static long maxPrecision(int numBytes) { @@ -649,7 +642,6 @@ private static long maxPrecision(int numBytes) { } protected DecimalMetadata decimalMetadata() { - DecimalMetadata meta = null; if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) { LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalType = (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalTypeAnnotation; @@ -671,16 +663,14 @@ protected DecimalMetadata decimalMetadata() { scale = decimalType.getScale(); precision = decimalType.getPrecision(); } - Preconditions.checkArgument(precision > 0, "Invalid DECIMAL precision: %s", precision); - Preconditions.checkArgument(this.scale >= 0, "Invalid DECIMAL scale: %s", this.scale); - Preconditions.checkArgument( - this.scale <= precision, - "Invalid DECIMAL scale: %s cannot be greater than precision: %s", - this.scale, - precision); - meta = new DecimalMetadata(precision, scale); + Preconditions.checkArgument(precision > 0, + "Invalid DECIMAL precision: %s", precision); + Preconditions.checkArgument(this.scale >= 0, + "Invalid DECIMAL scale: %s", this.scale); + Preconditions.checkArgument(this.scale <= precision, + "Invalid DECIMAL scale: cannot be greater than precision"); } - return meta; + return new DecimalMetadata(precision, scale); } } @@ -820,11 +810,7 @@ public THIS addFields(Type... types) { @Override protected GroupType build(String name) { - if (newLogicalTypeSet) { - return new GroupType(repetition, name, logicalTypeAnnotation, fields, id); - } else { - return new GroupType(repetition, name, getOriginalType(), fields, id); - } + return new GroupType(repetition, name, logicalTypeAnnotation, fields, id); } public MapBuilder map(Type.Repetition repetition) { diff --git a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java index 5172e788b5..ec3fffd25b 100644 --- a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java +++ b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java @@ -52,6 +52,9 @@ import static org.apache.parquet.schema.Type.Repetition.REPEATED; import static org.apache.parquet.schema.Type.Repetition.REQUIRED; import static org.apache.parquet.schema.Types.buildMessage; + +import org.apache.parquet.schema.LogicalTypeAnnotation; +import org.junit.Test; import static org.junit.Assert.assertEquals; import org.apache.parquet.schema.GroupType; @@ -255,7 +258,7 @@ public void testLISTAnnotation() { } @Test - public void testDecimalFixedAnnotation() { + public void testDecimalFixedAnnotationOriginalType() { String message = "message DecimalMessage {\n" + " required FIXED_LEN_BYTE_ARRAY(4) aDecimal (DECIMAL(9,2));\n" + "}\n"; @@ -275,8 +278,30 @@ public void testDecimalFixedAnnotation() { } @Test - public void testDecimalBinaryAnnotation() { - String message = "message DecimalMessage {\n" + " required binary aDecimal (DECIMAL(9,2));\n" + "}\n"; + public void testDecimalFixedAnnotation() { + String message = + "message DecimalMessage {\n" + + " required FIXED_LEN_BYTE_ARRAY(4) aDecimal (DECIMAL(9,2));\n" + + "}\n"; + + MessageType parsed = parseMessageType(message); + MessageType expected = buildMessage() + .required(FIXED_LEN_BYTE_ARRAY).length(4) + .as(LogicalTypeAnnotation.decimalType(2, 9)) + .named("aDecimal") + .named("DecimalMessage"); + + assertEquals(expected, parsed); + MessageType reparsed = parseMessageType(parsed.toString()); + assertEquals(expected, reparsed); + } + + @Test + public void testDecimalBinaryAnnotationOriginalType() { + String message = + "message DecimalMessage {\n" + + " required binary aDecimal (DECIMAL(9,2));\n" + + "}\n"; MessageType parsed = parseMessageType(message); MessageType expected = buildMessage() @@ -292,6 +317,24 @@ public void testDecimalBinaryAnnotation() { assertEquals(expected, reparsed); } + @Test + public void testDecimalBinaryAnnotation() { + String message = + "message DecimalMessage {\n" + + " required binary aDecimal (DECIMAL(9,2));\n" + + "}\n"; + + MessageType parsed = parseMessageType(message); + MessageType expected = buildMessage() + .required(BINARY).as(LogicalTypeAnnotation.decimalType(2, 9)) + .named("aDecimal") + .named("DecimalMessage"); + + assertEquals(expected, parsed); + MessageType reparsed = parseMessageType(parsed.toString()); + assertEquals(expected, reparsed); + } + @Test public void testTimeAnnotations() { String message = "message TimeMessage {" + " required int32 date (DATE);" diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index 3597898c30..0a36d853d3 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -415,7 +415,7 @@ public Optional visit(LogicalTypeAnnotation.TimeLogicalTypeAnnota case NANOS: return empty(); default: - throw new RuntimeException("Unknown converted type for " + timeLogicalType.toOriginalType()); + throw new RuntimeException("Unknown converted type for " + timeLogicalType); } } @@ -430,7 +430,7 @@ public Optional visit( case NANOS: return empty(); default: - throw new RuntimeException("Unknown converted type for " + timestampLogicalType.toOriginalType()); + throw new RuntimeException("Unknown converted type for " + timestampLogicalType); } } @@ -447,7 +447,7 @@ public Optional visit(LogicalTypeAnnotation.IntLogicalTypeAnnotat case 64: return of(signed ? ConvertedType.INT_64 : ConvertedType.UINT_64); default: - throw new RuntimeException("Unknown original type " + intLogicalType.toOriginalType()); + throw new RuntimeException("Unknown original type " + intLogicalType); } } @@ -1263,6 +1263,20 @@ Type getType(PrimitiveTypeName type) { } } + private boolean getAdjustToUtc(SchemaElement schemaElement) { + if (schemaElement != null && schemaElement.logicalType != null) { + Object field = schemaElement.logicalType.getFieldValue(); + if (field instanceof TimeType) { + return ((TimeType)field).isAdjustedToUTC; + } + else if (field instanceof TimestampType) { + return ((TimestampType)field).isAdjustedToUTC; + } + } + + return true; + } + // Visible for testing LogicalTypeAnnotation getLogicalTypeAnnotation(ConvertedType type, SchemaElement schemaElement) { switch (type) { @@ -1283,13 +1297,13 @@ LogicalTypeAnnotation getLogicalTypeAnnotation(ConvertedType type, SchemaElement case DATE: return LogicalTypeAnnotation.dateType(); case TIME_MILLIS: - return LogicalTypeAnnotation.timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS); + return LogicalTypeAnnotation.timeType(getAdjustToUtc(schemaElement), LogicalTypeAnnotation.TimeUnit.MILLIS); case TIME_MICROS: - return LogicalTypeAnnotation.timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS); + return LogicalTypeAnnotation.timeType(getAdjustToUtc(schemaElement), LogicalTypeAnnotation.TimeUnit.MICROS); case TIMESTAMP_MILLIS: - return LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS); + return LogicalTypeAnnotation.timestampType(getAdjustToUtc(schemaElement), LogicalTypeAnnotation.TimeUnit.MILLIS); case TIMESTAMP_MICROS: - return LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS); + return LogicalTypeAnnotation.timestampType(getAdjustToUtc(schemaElement), LogicalTypeAnnotation.TimeUnit.MICROS); case INTERVAL: return LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance(); case INT_8: @@ -2051,20 +2065,15 @@ private void buildChildren( childBuilder.as(getLogicalTypeAnnotation(schemaElement.logicalType)); } if (schemaElement.isSetConverted_type()) { - OriginalType originalType = getLogicalTypeAnnotation(schemaElement.converted_type, schemaElement) - .toOriginalType(); - OriginalType newOriginalType = (schemaElement.isSetLogicalType() - && getLogicalTypeAnnotation(schemaElement.logicalType) != null) - ? getLogicalTypeAnnotation(schemaElement.logicalType).toOriginalType() - : null; - if (!originalType.equals(newOriginalType)) { - if (newOriginalType != null) { - LOG.warn( - "Converted type and logical type metadata mismatch (convertedType: {}, logical type: {}). Using value in converted type.", - schemaElement.converted_type, - schemaElement.logicalType); + LogicalTypeAnnotation typeAnnotation = getLogicalTypeAnnotation(schemaElement.converted_type, schemaElement); + LogicalTypeAnnotation newTypeAnnotation = (schemaElement.isSetLogicalType() && getLogicalTypeAnnotation(schemaElement.logicalType) != null) ? + getLogicalTypeAnnotation(schemaElement.logicalType) : null; + if (!typeAnnotation.equals(newTypeAnnotation)) { + if (newTypeAnnotation != null) { + LOG.warn("Converted type and logical type metadata mismatch (convertedType: {}, logical type: {}). Using value in converted type.", + schemaElement.converted_type, schemaElement.logicalType); } - childBuilder.as(originalType); + childBuilder.as(typeAnnotation); } } if (schemaElement.isSetField_id()) { diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index e4851d8ec4..21935629c6 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -498,16 +498,6 @@ public void testEnumEquivalence() { for (Type type : Type.values()) { assertEquals(type, parquetMetadataConverter.getType(parquetMetadataConverter.getPrimitive(type))); } - for (OriginalType original : OriginalType.values()) { - assertEquals( - original, - parquetMetadataConverter - .getLogicalTypeAnnotation( - parquetMetadataConverter.convertToConvertedType( - LogicalTypeAnnotation.fromOriginalType(original, null)), - null) - .toOriginalType()); - } for (ConvertedType converted : ConvertedType.values()) { assertEquals( converted, diff --git a/pom.xml b/pom.xml index b3504ce935..3105ba3db7 100644 --- a/pom.xml +++ b/pom.xml @@ -590,6 +590,31 @@ ${shade.prefix} + org.apache.parquet.hadoop.CodecFactory + org.apache.parquet.hadoop.ParquetReader + org.apache.parquet.thrift.projection.deprecated.PathGlobPattern + + org.apache.parquet.hadoop.ColumnChunkPageWriteStore + org.apache.parquet.hadoop.ParquetRecordWriter + + org.apache.parquet.conf.PlainParquetConfiguration#getClass(java.lang.String,java.lang.Class,java.lang.Class) + org.apache.parquet.conf.ParquetConfiguration#getClass(java.lang.String,java.lang.Class,java.lang.Class) + org.apache.parquet.hadoop.util.SerializationUtil#readObjectFromConfAsBase64(java.lang.String,org.apache.parquet.conf.ParquetConfiguration) + org.apache.parquet.conf.HadoopParquetConfiguration#getClass(java.lang.String,java.lang.Class,java.lang.Class) + org.apache.parquet.avro.AvroParquetReader#builder(org.apache.parquet.io.InputFile,org.apache.parquet.conf.ParquetConfiguration) + org.apache.parquet.hadoop.thrift.TBaseWriteSupport#setThriftClass(org.apache.parquet.conf.ParquetConfiguration,java.lang.Class) + org.apache.parquet.proto.ProtoParquetReader#builder(org.apache.hadoop.fs.Path,boolean) + org.apache.parquet.proto.ProtoParquetReader#builder(org.apache.parquet.io.InputFile,boolean) + + + org.apache.parquet.arrow.schema.SchemaMapping + + org.apache.parquet.schema.GroupType + org.apache.parquet.schema.LogicalTypeAnnotation + org.apache.parquet.schema.MessageType + org.apache.parquet.schema.PrimitiveType + org.apache.parquet.schema.Type + org.apache.parquet.schema.Types org.apache.parquet.internal.column.columnindex.IndexIterator