Skip to content
Merged
Show file tree
Hide file tree
Changes from 62 commits
Commits
Show all changes
77 commits
Select commit Hold shift + click to select a range
2119555
feat(optimizer): add data model — schema, entities, DTOs, converters
mkuchenbecker Apr 3, 2026
3c93d52
fix: address PR review feedback on optimizer data model
mkuchenbecker Apr 3, 2026
d419eb3
feat(optimizer): add repositories and repository tests
mkuchenbecker Apr 6, 2026
7ff3b43
fix: consolidate repo methods — single find with optional filters
mkuchenbecker Apr 6, 2026
ac1da01
feat(optimizer): add apps/optimizer shared module with find-only repos
mkuchenbecker Apr 6, 2026
02a5ab3
fix: remove orphan fields from CompleteOperationRequest
mkuchenbecker Apr 6, 2026
5c78c8f
Merge mkuchenb/optimizer-0 into optimizer-1
mkuchenbecker Apr 6, 2026
1cbe556
Merge branch 'main' into mkuchenb/optimizer-0
mkuchenbecker Apr 30, 2026
231e1a1
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker Apr 30, 2026
f82d1b3
fix(optimizer): address PR #527 review feedback
mkuchenbecker May 1, 2026
e907a31
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 1, 2026
a109f02
fix(optimizer): propagate optimizer-0 renames into repos and tests
mkuchenbecker May 1, 2026
027fccd
fix(optimizer): add databaseName + tableName to apps/optimizer histor…
mkuchenbecker May 1, 2026
79753f1
fix(optimizer): index table_operations_history on (database_name, tab…
mkuchenbecker May 1, 2026
ae610ae
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 1, 2026
bf04488
fix(optimizer): align apps/optimizer entities with services schema
mkuchenbecker May 12, 2026
62f426a
feat(optimizer): add findLatestPerTable to history repo
mkuchenbecker May 12, 2026
3483b25
perf(optimizer): index table_operations_history for findLatestPerTable
mkuchenbecker May 13, 2026
0293009
feat(optimizer): add findDistinctDatabaseNames to TableStatsRepository
mkuchenbecker May 13, 2026
eba1392
feat(optimizer): promote internal model types to shared apps/optimizer
mkuchenbecker May 14, 2026
e576593
refactor(optimizer): rename apps/optimizer entities + repos to plural…
mkuchenbecker May 14, 2026
d90c26f
refactor(optimizer): move apps/optimizer module into services/optimizer
mkuchenbecker May 14, 2026
9a129a8
refactor(optimizer): align data model — rename HistoryStatus; String …
mkuchenbecker May 14, 2026
a8978a0
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
dfb9102
refactor(optimizer): realign entity shapes with optimizer-0
mkuchenbecker May 14, 2026
681407e
feat(optimizer): add internal model layer
mkuchenbecker May 14, 2026
2005bca
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
e3fb777
perf(optimizer): index table_operations_history for findLatestPerTable
mkuchenbecker May 14, 2026
f89889d
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
d3e1726
refactor(optimizer): enforce layer boundaries in api/ + model/
mkuchenbecker May 14, 2026
db9513a
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
1d469a7
refactor(optimizer): remove db-layer types from optimizer-0
mkuchenbecker May 14, 2026
eee8eca
refactor(optimizer): remove DB schema + schema-init properties
mkuchenbecker May 14, 2026
0567753
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
328e5b9
refactor(optimizer): scrub MySQL / JPA / datasource references
mkuchenbecker May 14, 2026
f7a5d20
refactor(optimizer): drop UpsertTableOperationsRequest
mkuchenbecker May 14, 2026
2a532b5
refactor(optimizer): drop JobResult from the wire and internal model
mkuchenbecker May 14, 2026
2e3a231
feat(optimizer): add debug echo fields to CompleteOperationRequest
mkuchenbecker May 14, 2026
db5eb29
refactor(optimizer): move application.properties out of optimizer-0
mkuchenbecker May 14, 2026
bbcf84a
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
ac3abc0
feat(optimizer): introduce db/ layer with per-layer types
mkuchenbecker May 14, 2026
e79eec7
refactor(optimizer): split TableStats envelope into snapshot + delta …
mkuchenbecker May 14, 2026
f955ded
fix(optimizer): drop CommitDeltaMetrics from TableStatsRow
mkuchenbecker May 14, 2026
861b584
feat(optimizer): extend model layer for service-only types
mkuchenbecker May 14, 2026
41d4c6d
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
b60a3bf
feat(optimizer): extend ModelDbMapper for service-only types
mkuchenbecker May 14, 2026
25d98aa
feat(optimizer): restore batch CAS methods on TableOperationsRepository
mkuchenbecker May 14, 2026
188713d
docs(optimizer): comment every field on opt-0 api/ and model/ types
mkuchenbecker May 14, 2026
f060b5e
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
8d64273
refactor(optimizer): remove clusterId from SnapshotMetrics
mkuchenbecker May 14, 2026
ee7bcab
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
c1ad246
refactor(optimizer): comment every db/ field; drop clusterId and version
mkuchenbecker May 14, 2026
c72aae8
refactor(optimizer): move api↔model conversion onto api types; delete…
mkuchenbecker May 14, 2026
1fca287
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
8ae8777
refactor(optimizer): move model↔db conversion onto model types; delet…
mkuchenbecker May 14, 2026
af23d5e
fix(optimizer): make TableStats self-describing; route DTO conversion…
mkuchenbecker May 15, 2026
3864e42
chore(optimizer): cascade self-describing TableStats from opt-0 to opt-1
mkuchenbecker May 15, 2026
a6045b5
feat(optimizer): add TableStats↔TableStatsRow conversion on model
mkuchenbecker May 15, 2026
3aebf64
chore(optimizer): enable toBuilder on model.Table and model.TableOper…
mkuchenbecker May 15, 2026
bf30f86
chore(optimizer): cascade toBuilder annotations from opt-0 to opt-1
mkuchenbecker May 15, 2026
b6c7f42
refactor(optimizer): drop fileCount enrichment from model.TableOperation
mkuchenbecker May 18, 2026
177af95
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 18, 2026
2b06c92
feat(repo): add findClaimedIds for transactional batch-claim verifica…
mkuchenbecker May 18, 2026
437a0ed
refactor(optimizer): add Dto suffix to all api/model classes (PR #527…
mkuchenbecker May 19, 2026
aabb51c
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 19, 2026
4f98c22
refactor(optimizer): rename api.model package to api.spec (PR #527 re…
mkuchenbecker May 19, 2026
2c26872
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 19, 2026
b31decf
refactor(optimizer): move Dto suffix from api/spec to model
mkuchenbecker May 20, 2026
caf3294
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 20, 2026
4e86569
feat(optimizer): propagate jobId through model + api conversions
mkuchenbecker May 20, 2026
cc8aa80
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 20, 2026
efcceea
feat(optimizer): propagate jobId through model ↔ db conversions
mkuchenbecker May 20, 2026
1fe71f0
refactor(optimizer): rename CompleteOperationRequest → UpdateOperatio…
mkuchenbecker May 20, 2026
fb5e726
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 20, 2026
b96c388
Merge remote-tracking branch 'linkedin/main' into mkuchenb/optimizer-1
mkuchenbecker May 20, 2026
d65b511
refactor(optimizer-repo): unify find/updateBatch with Optional params
mkuchenbecker May 21, 2026
b69e09a
test(optimizer-repo): truncate Instant to micros for CI precision
mkuchenbecker May 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ tasks.register('CopyGitHooksTask', Copy) {
// tables-service.Dockerfile -> :services:tables:bootJar
// housetables-service.Dockerfile -> :services:housetables:bootJar
// jobs-service.Dockerfile -> :services:jobs:bootJar
// optimizer-service.Dockerfile -> :services:optimizer:bootJar
// jobs-scheduler.Dockerfile -> :apps:openhouse-spark-apps_2.12:shadowJar (uber JAR)
// spark-base-hadoop2.8.dockerfile ->
// :integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar (uber JAR)
Expand All @@ -196,6 +197,7 @@ tasks.register('dockerPrereqs') {
dependsOn ':services:tables:bootJar'
dependsOn ':services:housetables:bootJar'
dependsOn ':services:jobs:bootJar'
dependsOn ':services:optimizer:bootJar'

// Spark runtime uber JARs (shadowJar)
dependsOn ':integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar'
Expand All @@ -219,6 +221,7 @@ tasks.register('dockerPrereqs') {
println ' build/tables/libs/tables.jar'
println ' build/housetables/libs/housetables.jar'
println ' build/jobs/libs/jobs.jar'
println ' build/optimizer/libs/optimizer.jar'
println ' build/openhouse-spark-runtime_2.12/libs/openhouse-spark-runtime_2.12-uber.jar'
println ' build/openhouse-spark-3.5-runtime_2.12/libs/openhouse-spark-3.5-runtime_2.12-uber.jar'
println ' build/openhouse-spark-apps_2.12/libs/openhouse-spark-apps_2.12-uber.jar'
Expand Down
17 changes: 17 additions & 0 deletions services/optimizer/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
plugins {
id 'openhouse.springboot-ext-conventions'
id 'org.springframework.boot' version '2.7.8'
}

dependencies {
implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8'
implementation 'com.vladmihalcea:hibernate-types-55:2.21.1'
implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8'
implementation 'mysql:mysql-connector-java:8.+'
testImplementation 'com.h2database:h2:2.2.224'
testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8'
}

test {
useJUnitPlatform()
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package com.linkedin.openhouse.optimizer;

import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

/** Spring Boot entry point for the Optimizer Service. */
@SpringBootApplication
public class OptimizerServiceApplication {

public static void main(String[] args) {
SpringApplication.run(OptimizerServiceApplication.class, args);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package com.linkedin.openhouse.optimizer.api.model;

import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;

/**
* Request body for {@code POST /v1/table-operations/complete}.
*
* <p>Reports the outcome of a single completed operation. The service looks up the operation row by
* {@link #operationId} and writes a history entry for it.
*
* <p>A single Spark job typically processes N tables and yields N independent (status) outcomes —
* one per operation. Callers issue one complete request per operation; the service does not
* bulk-complete by job.
*
* <p>The remaining fields ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}, {@link
* #operationType}) are debug-only echo information. The server does not key off them; they are
* preserved on log lines and traces so an operator looking at a failing complete call can see which
* (db, table, operation) the caller believed it was completing without joining back to the
* operation row.
*/
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class CompleteOperationRequest {

/** Operation row's UUID — the primary lookup key. */
private String operationId;

/** Terminal outcome for this single operation. */
private HistoryStatus status;

/** Debug echo: stable table identity the caller believed it was completing. */
private String tableUuid;

/** Debug echo: database name. */
private String databaseName;

/** Debug echo: table name. */
private String tableName;

/** Debug echo: operation type. */
private OperationType operationType;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package com.linkedin.openhouse.optimizer.api.model;

/** Terminal states for a completed Spark maintenance job. */
public enum HistoryStatus {

/** The Spark job for this operation completed successfully. */
SUCCESS,

/** The Spark job for this operation failed. */
FAILED;

/** Convert to the internal-model counterpart. */
public com.linkedin.openhouse.optimizer.model.HistoryStatus toModel() {
return com.linkedin.openhouse.optimizer.model.HistoryStatus.valueOf(name());
}

/** Build the api-layer enum from the internal-model counterpart. */
public static HistoryStatus fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) {
return v == null ? null : HistoryStatus.valueOf(v.name());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package com.linkedin.openhouse.optimizer.api.model;

/** Lifecycle states for a table operation recommendation. */
public enum OperationStatus {

/** Recommended by the Analyzer but not yet claimed by the Scheduler. */
PENDING,

/** Claimed by the Scheduler; waiting for the Jobs Service to return a job ID. */
SCHEDULING,

/** Job submitted to the Jobs Service; the row now carries a {@code jobId}. */
SCHEDULED,

/**
* Marked by the Scheduler when it detects duplicate PENDING rows for the same {@code (table_uuid,
* operation_type)}. Only the most-recent PENDING row is claimed; older duplicates are CANCELED
* before the claim step.
*/
CANCELED;

/** Convert to the internal-model counterpart. */
public com.linkedin.openhouse.optimizer.model.OperationStatus toModel() {
return com.linkedin.openhouse.optimizer.model.OperationStatus.valueOf(name());
}

/** Build the api-layer enum from the internal-model counterpart. */
public static OperationStatus fromModel(
com.linkedin.openhouse.optimizer.model.OperationStatus v) {
return v == null ? null : OperationStatus.valueOf(v.name());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.linkedin.openhouse.optimizer.api.model;

/** Maintenance operation types supported by the continuous optimizer. */
public enum OperationType {
/** Removes orphaned data files no longer referenced by table metadata. */
ORPHAN_FILES_DELETION;

/** Convert to the internal-model counterpart. */
public com.linkedin.openhouse.optimizer.model.OperationType toModel() {
return com.linkedin.openhouse.optimizer.model.OperationType.valueOf(name());
}

/** Build the api-layer enum from the internal-model counterpart. */
public static OperationType fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) {
return v == null ? null : OperationType.valueOf(v.name());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package com.linkedin.openhouse.optimizer.api.model;

import com.linkedin.openhouse.optimizer.model.TableOperation;
import java.time.Instant;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;

/** DTO for {@code table_operations} — Analyzer recommendations read by the Scheduler. */
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class TableOperationsDto {

/** Client-generated UUID identifying this specific operation recommendation. */
private String id;

/** Stable table identity from the Tables Service. */
private String tableUuid;

/** Denormalized database name for display; not part of the primary key. */
private String databaseName;

/** Denormalized table name for display; not part of the primary key. */
private String tableName;

/** The type of maintenance operation (e.g. ORPHAN_FILES_DELETION). */
private OperationType operationType;

/** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */
private OperationStatus status;

/** Server-set when the row is first created by the Analyzer. */
private Instant createdAt;

/** Set by the Scheduler when claiming; {@code null} while PENDING. */
private Instant scheduledAt;

/** Job ID returned by the Jobs Service after successful submission. */
private String jobId;

/** Convert to the internal-model counterpart. */
public TableOperation toModel() {
return TableOperation.builder()
.id(id)
.tableUuid(tableUuid)
.databaseName(databaseName)
.tableName(tableName)
.operationType(operationType == null ? null : operationType.toModel())
.status(status == null ? null : status.toModel())
.createdAt(createdAt)
.scheduledAt(scheduledAt)
.build();
}

/** Build a wire DTO from the internal-model counterpart. */
public static TableOperationsDto fromModel(TableOperation op) {
if (op == null) {
return null;
}
return TableOperationsDto.builder()
.id(op.getId())
.tableUuid(op.getTableUuid())
.databaseName(op.getDatabaseName())
.tableName(op.getTableName())
.operationType(OperationType.fromModel(op.getOperationType()))
.status(OperationStatus.fromModel(op.getStatus()))
.createdAt(op.getCreatedAt())
.scheduledAt(op.getScheduledAt())
.build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package com.linkedin.openhouse.optimizer.api.model;

import com.linkedin.openhouse.optimizer.model.TableOperationsHistory;
import java.time.Instant;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;

/** DTO for {@code table_operations_history} — append-only operation results. */
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class TableOperationsHistoryDto {

/** Same UUID as the originating {@code table_operations.id}; supplied by the caller. */
private String id;

/** Stable table identity from the Tables Service. */
private String tableUuid;

/** Denormalized database name for display. */
private String databaseName;

/** Denormalized table name for display. */
private String tableName;

/** The type of maintenance operation this history row records. */
private OperationType operationType;

/** When the operation completed, as recorded by the complete endpoint. */
private Instant completedAt;

/** {@code SUCCESS} or {@code FAILED}. */
private HistoryStatus status;

/** Convert to the internal-model counterpart. */
public TableOperationsHistory toModel() {
return TableOperationsHistory.builder()
.id(id)
.tableUuid(tableUuid)
.databaseName(databaseName)
.tableName(tableName)
.operationType(operationType == null ? null : operationType.toModel())
.completedAt(completedAt)
.status(status == null ? null : status.toModel())
.build();
}

/** Build a wire DTO from the internal-model counterpart. */
public static TableOperationsHistoryDto fromModel(TableOperationsHistory h) {
if (h == null) {
return null;
}
return TableOperationsHistoryDto.builder()
.id(h.getId())
.tableUuid(h.getTableUuid())
.databaseName(h.getDatabaseName())
.tableName(h.getTableName())
.operationType(OperationType.fromModel(h.getOperationType()))
.completedAt(h.getCompletedAt())
.status(HistoryStatus.fromModel(h.getStatus()))
.build();
}
}
Loading