Skip to content

Commit

Permalink
Add support for v3 format in Iceberg
Browse files Browse the repository at this point in the history
  • Loading branch information
ebyhr committed Dec 12, 2024
1 parent 4718011 commit 62274e9
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 15 deletions.
4 changes: 2 additions & 2 deletions docs/src/main/sphinx/connector/iceberg.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
Apache Iceberg is an open table format for huge analytic datasets. The Iceberg
connector allows querying data stored in files written in Iceberg format, as
defined in the [Iceberg Table Spec](https://iceberg.apache.org/spec/). The
connector supports Apache Iceberg table spec versions 1 and 2.
connector supports Apache Iceberg table spec versions 1, 2 and 3.

The table state is maintained in metadata files. All changes to table
state create a new metadata file and replace the old metadata with an atomic
Expand Down Expand Up @@ -861,7 +861,7 @@ connector using a {doc}`WITH </sql/create-table-as>` clause.
- Optionally specifies the file system location URI for the table.
* - `format_version`
- Optionally specifies the format version of the Iceberg specification to use
for new tables; either `1` or `2`. Defaults to `2`. Version `2` is required
for new tables; either `1`, `2` or `3`. Defaults to `2`. Version `2` is required
for row level deletes.
* - `orc_bloom_filter_columns`
- Comma-separated list of columns to use for ORC bloom filter. It improves the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@
public class IcebergConfig
{
public static final int FORMAT_VERSION_SUPPORT_MIN = 1;
public static final int FORMAT_VERSION_SUPPORT_MAX = 2;
private static final int FORMAT_VERSION_DEFAULT = 2;
public static final int FORMAT_VERSION_SUPPORT_MAX = 3;
public static final String EXTENDED_STATISTICS_CONFIG = "iceberg.extended-statistics.enabled";
public static final String EXTENDED_STATISTICS_DESCRIPTION = "Enable collection (ANALYZE) and use of extended statistics.";
public static final String COLLECT_EXTENDED_STATISTICS_ON_WRITE_DESCRIPTION = "Collect extended statistics during writes";
Expand All @@ -72,7 +73,7 @@ public class IcebergConfig
private boolean registerTableProcedureEnabled;
private boolean addFilesProcedureEnabled;
private Optional<String> hiveCatalogName = Optional.empty();
private int formatVersion = FORMAT_VERSION_SUPPORT_MAX;
private int formatVersion = FORMAT_VERSION_DEFAULT;
private Duration expireSnapshotsMinRetention = new Duration(7, DAYS);
private Duration removeOrphanFilesMinRetention = new Duration(7, DAYS);
private DataSize targetMaxFileSize = DataSize.of(1, GIGABYTE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,8 +374,8 @@ public class IcebergMetadata
{
private static final Logger log = Logger.get(IcebergMetadata.class);
private static final Pattern PATH_PATTERN = Pattern.compile("(.*)/[^/]+");
private static final int OPTIMIZE_MAX_SUPPORTED_TABLE_VERSION = 2;
private static final int CLEANING_UP_PROCEDURES_MAX_SUPPORTED_TABLE_VERSION = 2;
private static final int OPTIMIZE_MAX_SUPPORTED_TABLE_VERSION = 3;
private static final int CLEANING_UP_PROCEDURES_MAX_SUPPORTED_TABLE_VERSION = 3;
private static final String RETENTION_THRESHOLD = "retention_threshold";
private static final String UNKNOWN_SNAPSHOT_TOKEN = "UNKNOWN";
public static final Set<String> UPDATABLE_TABLE_PROPERTIES = ImmutableSet.<String>builder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ protected IcebergQueryRunner.Builder createQueryRunnerBuilder()
{
return IcebergQueryRunner.builder()
.setIcebergProperties(ImmutableMap.<String, String>builder()
.put("iceberg.format-version", "3")
.put("iceberg.file-format", format.name())
// Only allow some extra properties. Add "sorted_by" so that we can test that the property is disallowed by the connector explicitly.
.put("iceberg.allowed-extra-properties", "extra.property.one,extra.property.two,extra.property.three,sorted_by")
Expand Down Expand Up @@ -375,7 +376,7 @@ public void testShowCreateTable()
")\n" +
"WITH (\n" +
" format = '" + format.name() + "',\n" +
" format_version = 2,\n" +
" format_version = 3,\n" +
" location = '\\E.*/tpch/orders-.*\\Q'\n" +
")\\E");
}
Expand Down Expand Up @@ -1260,7 +1261,7 @@ public void testCreatePartitionedTableAs()
assertUpdate(
"CREATE TABLE test_create_partitioned_table_as " +
"WITH (" +
"format_version = 2," +
"format_version = 3," +
"location = '" + tempDirPath + "', " +
"partitioning = ARRAY['ORDER_STATUS', 'Ship_Priority', 'Bucket(\"order key\",9)']" +
") " +
Expand All @@ -1277,7 +1278,7 @@ public void testCreatePartitionedTableAs()
")\n" +
"WITH (\n" +
" format = '%s',\n" +
" format_version = 2,\n" +
" format_version = 3,\n" +
" location = '%s',\n" +
" partitioning = ARRAY['order_status','ship_priority','bucket(\"order key\", 9)']\n" +
")",
Expand Down Expand Up @@ -1645,7 +1646,7 @@ public void testTableComments()
"COMMENT '%s'\n" +
"WITH (\n" +
format(" format = '%s',\n", format) +
" format_version = 2,\n" +
" format_version = 3,\n" +
format(" location = '%s'\n", tempDirPath) +
")";
String createTableWithoutComment = "" +
Expand All @@ -1654,7 +1655,7 @@ public void testTableComments()
")\n" +
"WITH (\n" +
" format = '" + format + "',\n" +
" format_version = 2,\n" +
" format_version = 3,\n" +
" location = '" + tempDirPath + "'\n" +
")";
String createTableSql = format(createTableTemplate, "test table comment", format);
Expand Down Expand Up @@ -1935,7 +1936,7 @@ private void testCreateTableLikeForFormat(IcebergFileFormat otherFormat)
"""
WITH (
format = '%s',
format_version = 2,
format_version = 3,
location = '%s',
partitioning = ARRAY['adate']
)""",
Expand All @@ -1951,7 +1952,7 @@ private void testCreateTableLikeForFormat(IcebergFileFormat otherFormat)
"""
WITH (
format = '%s',
format_version = 2,
format_version = 3,
location = '%s'
)""",
format,
Expand All @@ -1962,7 +1963,7 @@ private void testCreateTableLikeForFormat(IcebergFileFormat otherFormat)
"""
WITH (
format = '%s',
format_version = 2,
format_version = 3,
location = '%s'
)""",
format,
Expand Down Expand Up @@ -7407,7 +7408,7 @@ public void testSnapshotSummariesHaveTrinoQueryIdFormatV2()
assertUpdate(format("INSERT INTO %s VALUES (1, 1), (1, 4), (1, 20), (2, 2)", sourceTableName), 4);

// Create table with CTAS
assertQueryIdAndUserStored(tableName, executeWithQueryId(format("CREATE TABLE %s WITH (format_version = 2, partitioning = ARRAY['a']) " +
assertQueryIdAndUserStored(tableName, executeWithQueryId(format("CREATE TABLE %s WITH (format_version = 3, partitioning = ARRAY['a']) " +
"AS SELECT * FROM %s", tableName, sourceTableName)));

// Insert records
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
connector.name=iceberg
hive.metastore.uri=thrift://hadoop-master:9083
iceberg.format-version=3
iceberg.register-table-procedure.enabled=true
iceberg.allowed-extra-properties=*
fs.hadoop.enabled=true

0 comments on commit 62274e9

Please sign in to comment.