Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
77 commits
Select commit Hold shift + click to select a range
2119555
feat(optimizer): add data model — schema, entities, DTOs, converters
mkuchenbecker Apr 3, 2026
3c93d52
fix: address PR review feedback on optimizer data model
mkuchenbecker Apr 3, 2026
d419eb3
feat(optimizer): add repositories and repository tests
mkuchenbecker Apr 6, 2026
7ff3b43
fix: consolidate repo methods — single find with optional filters
mkuchenbecker Apr 6, 2026
ac1da01
feat(optimizer): add apps/optimizer shared module with find-only repos
mkuchenbecker Apr 6, 2026
02a5ab3
fix: remove orphan fields from CompleteOperationRequest
mkuchenbecker Apr 6, 2026
5c78c8f
Merge mkuchenb/optimizer-0 into optimizer-1
mkuchenbecker Apr 6, 2026
1cbe556
Merge branch 'main' into mkuchenb/optimizer-0
mkuchenbecker Apr 30, 2026
231e1a1
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker Apr 30, 2026
f82d1b3
fix(optimizer): address PR #527 review feedback
mkuchenbecker May 1, 2026
e907a31
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 1, 2026
a109f02
fix(optimizer): propagate optimizer-0 renames into repos and tests
mkuchenbecker May 1, 2026
027fccd
fix(optimizer): add databaseName + tableName to apps/optimizer histor…
mkuchenbecker May 1, 2026
79753f1
fix(optimizer): index table_operations_history on (database_name, tab…
mkuchenbecker May 1, 2026
ae610ae
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 1, 2026
bf04488
fix(optimizer): align apps/optimizer entities with services schema
mkuchenbecker May 12, 2026
62f426a
feat(optimizer): add findLatestPerTable to history repo
mkuchenbecker May 12, 2026
3483b25
perf(optimizer): index table_operations_history for findLatestPerTable
mkuchenbecker May 13, 2026
0293009
feat(optimizer): add findDistinctDatabaseNames to TableStatsRepository
mkuchenbecker May 13, 2026
eba1392
feat(optimizer): promote internal model types to shared apps/optimizer
mkuchenbecker May 14, 2026
e576593
refactor(optimizer): rename apps/optimizer entities + repos to plural…
mkuchenbecker May 14, 2026
d90c26f
refactor(optimizer): move apps/optimizer module into services/optimizer
mkuchenbecker May 14, 2026
9a129a8
refactor(optimizer): align data model — rename HistoryStatus; String …
mkuchenbecker May 14, 2026
a8978a0
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
dfb9102
refactor(optimizer): realign entity shapes with optimizer-0
mkuchenbecker May 14, 2026
681407e
feat(optimizer): add internal model layer
mkuchenbecker May 14, 2026
2005bca
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
e3fb777
perf(optimizer): index table_operations_history for findLatestPerTable
mkuchenbecker May 14, 2026
f89889d
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
d3e1726
refactor(optimizer): enforce layer boundaries in api/ + model/
mkuchenbecker May 14, 2026
db9513a
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
1d469a7
refactor(optimizer): remove db-layer types from optimizer-0
mkuchenbecker May 14, 2026
eee8eca
refactor(optimizer): remove DB schema + schema-init properties
mkuchenbecker May 14, 2026
0567753
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
328e5b9
refactor(optimizer): scrub MySQL / JPA / datasource references
mkuchenbecker May 14, 2026
f7a5d20
refactor(optimizer): drop UpsertTableOperationsRequest
mkuchenbecker May 14, 2026
2a532b5
refactor(optimizer): drop JobResult from the wire and internal model
mkuchenbecker May 14, 2026
2e3a231
feat(optimizer): add debug echo fields to CompleteOperationRequest
mkuchenbecker May 14, 2026
db5eb29
refactor(optimizer): move application.properties out of optimizer-0
mkuchenbecker May 14, 2026
bbcf84a
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
ac3abc0
feat(optimizer): introduce db/ layer with per-layer types
mkuchenbecker May 14, 2026
e79eec7
refactor(optimizer): split TableStats envelope into snapshot + delta …
mkuchenbecker May 14, 2026
f955ded
fix(optimizer): drop CommitDeltaMetrics from TableStatsRow
mkuchenbecker May 14, 2026
861b584
feat(optimizer): extend model layer for service-only types
mkuchenbecker May 14, 2026
41d4c6d
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
b60a3bf
feat(optimizer): extend ModelDbMapper for service-only types
mkuchenbecker May 14, 2026
25d98aa
feat(optimizer): restore batch CAS methods on TableOperationsRepository
mkuchenbecker May 14, 2026
188713d
docs(optimizer): comment every field on opt-0 api/ and model/ types
mkuchenbecker May 14, 2026
f060b5e
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
8d64273
refactor(optimizer): remove clusterId from SnapshotMetrics
mkuchenbecker May 14, 2026
ee7bcab
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
c1ad246
refactor(optimizer): comment every db/ field; drop clusterId and version
mkuchenbecker May 14, 2026
c72aae8
refactor(optimizer): move api↔model conversion onto api types; delete…
mkuchenbecker May 14, 2026
1fca287
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
8ae8777
refactor(optimizer): move model↔db conversion onto model types; delet…
mkuchenbecker May 14, 2026
af23d5e
fix(optimizer): make TableStats self-describing; route DTO conversion…
mkuchenbecker May 15, 2026
3864e42
chore(optimizer): cascade self-describing TableStats from opt-0 to opt-1
mkuchenbecker May 15, 2026
a6045b5
feat(optimizer): add TableStats↔TableStatsRow conversion on model
mkuchenbecker May 15, 2026
3aebf64
chore(optimizer): enable toBuilder on model.Table and model.TableOper…
mkuchenbecker May 15, 2026
bf30f86
chore(optimizer): cascade toBuilder annotations from opt-0 to opt-1
mkuchenbecker May 15, 2026
b6c7f42
refactor(optimizer): drop fileCount enrichment from model.TableOperation
mkuchenbecker May 18, 2026
177af95
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 18, 2026
2b06c92
feat(repo): add findClaimedIds for transactional batch-claim verifica…
mkuchenbecker May 18, 2026
437a0ed
refactor(optimizer): add Dto suffix to all api/model classes (PR #527…
mkuchenbecker May 19, 2026
aabb51c
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 19, 2026
4f98c22
refactor(optimizer): rename api.model package to api.spec (PR #527 re…
mkuchenbecker May 19, 2026
2c26872
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 19, 2026
b31decf
refactor(optimizer): move Dto suffix from api/spec to model
mkuchenbecker May 20, 2026
caf3294
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 20, 2026
4e86569
feat(optimizer): propagate jobId through model + api conversions
mkuchenbecker May 20, 2026
cc8aa80
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 20, 2026
efcceea
feat(optimizer): propagate jobId through model ↔ db conversions
mkuchenbecker May 20, 2026
1fe71f0
refactor(optimizer): rename CompleteOperationRequest → UpdateOperatio…
mkuchenbecker May 20, 2026
fb5e726
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 20, 2026
b96c388
Merge remote-tracking branch 'linkedin/main' into mkuchenb/optimizer-1
mkuchenbecker May 20, 2026
d65b511
refactor(optimizer-repo): unify find/updateBatch with Optional params
mkuchenbecker May 21, 2026
b69e09a
test(optimizer-repo): truncate Instant to micros for CI precision
mkuchenbecker May 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions services/optimizer/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ plugins {
}

dependencies {
implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8'
implementation 'com.vladmihalcea:hibernate-types-55:2.21.1'
implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8'
implementation 'mysql:mysql-connector-java:8.+'
testImplementation 'com.h2database:h2:2.2.224'
testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8'
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package com.linkedin.openhouse.optimizer.db;

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;

/** Per-commit incremental counters. Serialized as JSON into the {@code delta} column. */
@Data
@Builder(toBuilder = true)
@NoArgsConstructor
@AllArgsConstructor
@JsonIgnoreProperties(ignoreUnknown = true)
public class CommitDeltaMetrics {

/** Number of data files this commit added to the table. */
private Long numFilesAdded;

/** Number of data files this commit removed from the table. */
private Long numFilesDeleted;

/** Total bytes added by this commit. */
private Long addedSizeBytes;

/** Total bytes removed by this commit. */
private Long deletedSizeBytes;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package com.linkedin.openhouse.optimizer.db;

/**
* DB-layer enum for the {@code status} column of {@code table_operations_history}.
*
* <p>Self-contained: no references to api/ or model/ types.
*/
public enum HistoryStatus {

/** The Spark job for this operation completed successfully. */
SUCCESS,

/** The Spark job for this operation failed. */
FAILED
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package com.linkedin.openhouse.optimizer.db;

/**
* DB-layer enum for the {@code status} column of {@code table_operations}.
*
* <p>Self-contained: no references to api/ or model/ types.
*/
public enum OperationStatus {

/** Analyzer has written the row; not yet claimed by the scheduler. */
PENDING,

/** Scheduler has claimed the row and is launching a job; jobId not yet recorded. */
SCHEDULING,

/** Job has been submitted to the Jobs Service; the row carries a {@code jobId}. */
SCHEDULED,

/** Scheduler marked this row as a duplicate of another PENDING row; not claimable. */
CANCELED
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package com.linkedin.openhouse.optimizer.db;

/**
* DB-layer enum for the operation types persisted in {@code table_operations.operation_type} and
* {@code table_operations_history.operation_type}.
*
* <p>Self-contained: no references to api/ or model/ types. JPA binds this via
* {@code @Enumerated(EnumType.STRING)}.
*/
public enum OperationType {
Comment thread
mkuchenbecker marked this conversation as resolved.

/** Removes orphaned data files no longer referenced by table metadata. */
ORPHAN_FILES_DELETION
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package com.linkedin.openhouse.optimizer.db;

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;

/** Point-in-time snapshot fields. Serialized as JSON into the {@code snapshot} column. */
@Data
@Builder(toBuilder = true)
@NoArgsConstructor
@AllArgsConstructor
@JsonIgnoreProperties(ignoreUnknown = true)
public class SnapshotMetrics {

/** Iceberg metadata version pointer for this snapshot. */
private String tableVersion;

/** Filesystem path (or URI) of the table's storage root. */
private String tableLocation;

/** Total on-disk size of the table at this snapshot, in bytes. */
private Long tableSizeBytes;

/** Total number of data files as of the latest snapshot — used for bin-packing. */
private Long numCurrentFiles;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package com.linkedin.openhouse.optimizer.db;

import java.time.Instant;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.Id;
import javax.persistence.Index;
import javax.persistence.Table;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NoArgsConstructor;

/**
* Append-only record of a completed maintenance operation.
*
* <p>Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the
* originating live-operations row, tying each history entry back to the operation cycle that
* produced it. Multiple runs of the same operation on the same table produce multiple rows.
*
* <p>Self-contained DB-layer type: enums are {@link OperationType} / {@link HistoryStatus} from the
* same package, JPA-bound as strings.
*/
@Entity
@Table(
name = "table_operations_history",
indexes = {
@Index(name = "idx_table_uuid_hist", columnList = "table_uuid"),
@Index(name = "idx_op_type_hist", columnList = "operation_type"),
@Index(name = "idx_completed_at", columnList = "completed_at"),
@Index(name = "idx_status_hist", columnList = "status"),
@Index(name = "idx_toph_db_table", columnList = "database_name, table_name")
})
@Getter
@EqualsAndHashCode
@Builder(toBuilder = true)
@NoArgsConstructor(access = AccessLevel.PROTECTED)
@AllArgsConstructor(access = AccessLevel.PROTECTED)
public class TableOperationsHistoryRow {

/** Same UUID as the originating live-operations row. Set by the caller; not generated. */
@Id
@Column(name = "id", nullable = false, length = 36)
private String id;

/** Stable table identity from the Tables Service. */
@Column(name = "table_uuid", nullable = false, length = 36)
private String tableUuid;

/** Denormalized database name. */
@Column(name = "database_name", nullable = false, length = 128)
private String databaseName;

/** Denormalized table name. */
@Column(name = "table_name", nullable = false, length = 128)
private String tableName;

/** The type of maintenance operation this history row records. */
@Enumerated(EnumType.STRING)
@Column(name = "operation_type", nullable = false, length = 50)
private OperationType operationType;

/** When the operation completed, as recorded by the complete endpoint. */
@Column(name = "completed_at", nullable = false)
private Instant completedAt;

/** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */
@Enumerated(EnumType.STRING)
@Column(name = "status", nullable = false, length = 20)
private HistoryStatus status;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package com.linkedin.openhouse.optimizer.db;

import java.time.Instant;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.Id;
import javax.persistence.Index;
import javax.persistence.Table;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NoArgsConstructor;

/**
* JPA entity representing an Analyzer recommendation for a table maintenance operation.
*
* <p>Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row
* when it first recommends an operation for a table, or when re-recommending after a prior terminal
* state. {@code table_uuid} is the stable identity for the table (survives renames; rotates on
* drop+recreate). The application enforces one active (PENDING / SCHEDULING / SCHEDULED) row per
* {@code (table_uuid, operation_type)} at a time.
*
* <p>Self-contained DB-layer type: enums are {@link OperationType} / {@link OperationStatus} from
* the same package, JPA-bound as strings.
*/
@Entity
@Table(
name = "table_operations",
indexes = {
@Index(name = "idx_table_uuid", columnList = "table_uuid"),
@Index(name = "idx_op_type", columnList = "operation_type"),
@Index(name = "idx_status", columnList = "status"),
@Index(name = "idx_created_at", columnList = "created_at"),
@Index(name = "idx_scheduled_at", columnList = "scheduled_at")
})
@Getter
@EqualsAndHashCode
@Builder(toBuilder = true)
@NoArgsConstructor(access = AccessLevel.PROTECTED)
@AllArgsConstructor(access = AccessLevel.PROTECTED)
public class TableOperationsRow {

/** Client-generated UUID identifying this specific operation recommendation. */
@Id
@Column(name = "id", nullable = false, length = 36)
private String id;

/** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */
@Column(name = "table_uuid", nullable = false, length = 36)
private String tableUuid;

/** Denormalized database name. */
@Column(name = "database_name", nullable = false, length = 128)
private String databaseName;

/** Denormalized table name. */
@Column(name = "table_name", nullable = false, length = 128)
private String tableName;

/** The type of maintenance operation this row recommends. */
@Enumerated(EnumType.STRING)
@Column(name = "operation_type", nullable = false, length = 50)
private OperationType operationType;

/** Lifecycle state — drives the scheduler's CAS claim and the analyzer's eligibility check. */
@Enumerated(EnumType.STRING)
@Column(name = "status", nullable = false, length = 20)
private OperationStatus status;

/** When the analyzer first created this row. Set on insert; never updated. */
@Column(name = "created_at", nullable = false)
private Instant createdAt;

/** When the scheduler last submitted a job for this row. {@code null} while {@code PENDING}. */
@Column(name = "scheduled_at")
private Instant scheduledAt;

/** Spark job ID written by the scheduler at claim time. Internal-only; never exposed on wire. */
@Column(name = "job_id", length = 255)
private String jobId;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package com.linkedin.openhouse.optimizer.db;

import com.vladmihalcea.hibernate.type.json.JsonStringType;
import java.time.Instant;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
import javax.persistence.Index;
import javax.persistence.Table;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NoArgsConstructor;
import org.hibernate.annotations.Type;
import org.hibernate.annotations.TypeDef;

/**
* Append-only record of per-commit stats reported by the Tables Service.
*
* <p>Each Iceberg commit produces one row. Consumers can query this table to reconstruct change
* rates over arbitrary time windows.
*
* <p>Self-contained DB-layer type. The stats payload is split across two JSON columns — {@link
* SnapshotMetrics} (point-in-time fields at commit time) and {@link CommitDeltaMetrics} (per-commit
* counters).
*/
@TypeDef(name = "json", typeClass = JsonStringType.class)
@Entity
@Table(
name = "table_stats_history",
indexes = {
@Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"),
@Index(name = "idx_tsh_recorded_at", columnList = "recorded_at")
})
@Getter
@EqualsAndHashCode
@Builder(toBuilder = true)
@NoArgsConstructor(access = AccessLevel.PROTECTED)
@AllArgsConstructor(access = AccessLevel.PROTECTED)
public class TableStatsHistoryRow {

/** UUID primary key — set by the caller, not generated server-side. */
@Id
@Column(name = "id", nullable = false, length = 36)
private String id;

/** Stable Iceberg table UUID. */
@Column(name = "table_uuid", nullable = false, length = 36)
private String tableUuid;

/** Denormalized database name. */
@Column(name = "database_name", nullable = false, length = 128)
private String databaseName;

/** Denormalized table name. */
@Column(name = "table_name", nullable = false, length = 128)
private String tableName;

/** Snapshot fields at commit time. Stored as a JSON blob in the {@code snapshot} column. */
@Type(type = "json")
@Column(name = "snapshot", columnDefinition = "TEXT")
private SnapshotMetrics snapshot;

/** Per-commit delta counters. Stored as a JSON blob in the {@code delta} column. */
@Type(type = "json")
@Column(name = "delta", columnDefinition = "TEXT")
private CommitDeltaMetrics delta;

/** When this history row was recorded (commit time). */
@Column(name = "recorded_at", nullable = false)
private Instant recordedAt;
}
Loading