From 5091e5752f8000afe3e116e5114d15eb61a4f9ef Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Wed, 22 Jan 2025 07:14:29 -0800 Subject: [PATCH] ORC: Fail when initial default support is required. (#12026) --- .../org/apache/iceberg/orc/ORCSchemaUtil.java | 28 +++++++++++------- .../iceberg/orc/TestBuildOrcProjection.java | 29 ++++++++++++++++++- 2 files changed, 46 insertions(+), 11 deletions(-) diff --git a/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java b/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java index 89a1632d5a..0a57c2f9ff 100644 --- a/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java +++ b/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java @@ -19,7 +19,6 @@ package org.apache.iceberg.orc; import java.util.List; -import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.Optional; @@ -264,11 +263,11 @@ public final class ORCSchemaUtil { public static TypeDescription buildOrcProjection( Schema schema, TypeDescription originalOrcSchema) { final Map icebergToOrc = icebergToOrcMapping("root", originalOrcSchema); - return buildOrcProjection(Integer.MIN_VALUE, schema.asStruct(), true, icebergToOrc); + return buildOrcProjection(schema, Integer.MIN_VALUE, schema.asStruct(), true, icebergToOrc); } private static TypeDescription buildOrcProjection( - Integer fieldId, Type type, boolean isRequired, Map mapping) { + Schema root, Integer fieldId, Type type, boolean isRequired, Map mapping) { final TypeDescription orcType; switch (type.typeId()) { @@ -284,6 +283,7 @@ public final class ORCSchemaUtil { .orElseGet(() -> nestedField.name() + "_r" + nestedField.fieldId()); TypeDescription childType = buildOrcProjection( + root, nestedField.fieldId(), nestedField.type(), isRequired && nestedField.isRequired(), @@ -295,6 +295,7 @@ public final class ORCSchemaUtil { Types.ListType list = (Types.ListType) type; TypeDescription elementType = buildOrcProjection( + root, list.elementId(), list.elementType(), isRequired && list.isElementRequired(), @@ -304,10 +305,10 @@ public final class ORCSchemaUtil { case MAP: Types.MapType map = (Types.MapType) type; TypeDescription keyType = - buildOrcProjection(map.keyId(), map.keyType(), isRequired, mapping); + buildOrcProjection(root, map.keyId(), map.keyType(), isRequired, mapping); TypeDescription valueType = buildOrcProjection( - map.valueId(), map.valueType(), isRequired && map.isValueRequired(), mapping); + root, map.valueId(), map.valueType(), isRequired && map.isValueRequired(), mapping); orcType = TypeDescription.createMap(keyType, valueType); break; default: @@ -326,13 +327,20 @@ public final class ORCSchemaUtil { orcType = originalType.clone(); } } else { + Types.NestedField field = root.findField(fieldId); if (isRequired) { - throw new IllegalArgumentException( + Preconditions.checkArgument( + field.initialDefault() != null, + "Missing required field: %s (%s)", + root.findColumnName(fieldId), + type); + } + + if (field.initialDefault() != null) { + throw new UnsupportedOperationException( String.format( - Locale.ROOT, - "Field %d of type %s is required and was not found.", - fieldId, - type)); + "ORC cannot read default value for field %s (%s): %s", + root.findColumnName(fieldId), type, field.initialDefault())); } orcType = convert(fieldId, type, false); diff --git a/orc/src/test/java/org/apache/iceberg/orc/TestBuildOrcProjection.java b/orc/src/test/java/org/apache/iceberg/orc/TestBuildOrcProjection.java index a179cb2f64..2d35f58613 100644 --- a/orc/src/test/java/org/apache/iceberg/orc/TestBuildOrcProjection.java +++ b/orc/src/test/java/org/apache/iceberg/orc/TestBuildOrcProjection.java @@ -162,6 +162,33 @@ public class TestBuildOrcProjection { assertThatThrownBy(() -> ORCSchemaUtil.buildOrcProjection(evolvedSchema, baseOrcSchema)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Field 4 of type long is required and was not found."); + .hasMessage("Missing required field: b.d (long)"); + } + + @Test + public void testRequiredNestedFieldWithDefaultMissingInFile() { + Schema baseSchema = + new Schema( + required(1, "a", Types.IntegerType.get()), + required(2, "b", Types.StructType.of(required(3, "c", Types.LongType.get())))); + TypeDescription baseOrcSchema = ORCSchemaUtil.convert(baseSchema); + + Schema evolvedSchema = + new Schema( + required(1, "a", Types.IntegerType.get()), + required( + 2, + "b", + Types.StructType.of( + required(3, "c", Types.LongType.get()), + Types.NestedField.required("d") + .withId(4) + .ofType(Types.LongType.get()) + .withInitialDefault(34L) + .build()))); + + assertThatThrownBy(() -> ORCSchemaUtil.buildOrcProjection(evolvedSchema, baseOrcSchema)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("ORC cannot read default value for field b.d (long): 34"); } }