Skip to content

Commit

Permalink
fixup! Add table statistics update
Browse files Browse the repository at this point in the history
  • Loading branch information
ndrluis committed Nov 12, 2024
1 parent d3aaab3 commit 11120bf
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 6 deletions.
5 changes: 3 additions & 2 deletions mkdocs/docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -1140,15 +1140,16 @@ table.update_statistics().set_statistics(snapshot_id, statistics_file).commit()
table.update_statistics()
.set_statistics(snapshot_id1, statistics_file1)
.remove_statistics(snapshot_id2)
.commit()
# Operations are applied on commit.
```

You can also use context managers to make more changes:

```python
with table.update_statistics() as update:
update.set_statistics(1, statistics_file)
update.remove_statistics(2)
update.set_statistics(snaphsot_id1, statistics_file)
update.remove_statistics(snapshot_id2)
```

## Query the data
Expand Down
2 changes: 1 addition & 1 deletion pyiceberg/table/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class StatisticsFile(IcebergBaseModel):
blob_metadata: List[BlobMetadata] = Field(alias="blob-metadata")


def reject_statistics(
def filter_statistics_by_snapshot_id(
statistics: List[StatisticsFile],
reject_snapshot_id: int,
) -> List[StatisticsFile]:
Expand Down
6 changes: 3 additions & 3 deletions pyiceberg/table/update/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
SnapshotLogEntry,
)
from pyiceberg.table.sorting import SortOrder
from pyiceberg.table.statistics import StatisticsFile, reject_statistics
from pyiceberg.table.statistics import StatisticsFile, filter_statistics_by_snapshot_id
from pyiceberg.typedef import (
IcebergBaseModel,
Properties,
Expand Down Expand Up @@ -496,7 +496,7 @@ def _(update: SetStatisticsUpdate, base_metadata: TableMetadata, context: _Table
if update.snapshot_id != update.statistics.snapshot_id:
raise ValueError("Snapshot id in statistics does not match the snapshot id in the update")

statistics = reject_statistics(base_metadata.statistics, update.snapshot_id)
statistics = filter_statistics_by_snapshot_id(base_metadata.statistics, update.snapshot_id)
context.add_update(update)

return base_metadata.model_copy(update={"statistics": statistics + [update.statistics]})
Expand All @@ -507,7 +507,7 @@ def _(update: RemoveStatisticsUpdate, base_metadata: TableMetadata, context: _Ta
if not any(stat.snapshot_id == update.snapshot_id for stat in base_metadata.statistics):
raise ValueError(f"Statistics with snapshot id {update.snapshot_id} does not exist")

statistics = reject_statistics(base_metadata.statistics, update.snapshot_id)
statistics = filter_statistics_by_snapshot_id(base_metadata.statistics, update.snapshot_id)
context.add_update(update)

return base_metadata.model_copy(update={"statistics": statistics})
Expand Down

0 comments on commit 11120bf

Please sign in to comment.