-
Notifications
You must be signed in to change notification settings - Fork 15.2k
KAFKA-19893: Reduce tiered storage redundancy with delayed upload (KIP-1241) #20913
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 139 commits
cb6f22f
4f96275
c410ae3
b765f33
c4a77b9
1b0705e
2cb87c7
aefdac1
3c5131b
44d74ac
791881e
31ecadb
f2b2464
5b059e1
4c108d3
05392d5
60060e2
997ecbd
3398d49
2ae53dd
7ef0a9e
b87f0af
f54c033
ef1ee28
1658a70
d99d7de
a5b6733
8b35773
9ea9ac2
7eefc4c
f18b95a
f0adf67
3d14856
b6c53b6
e19efef
dea59a3
b2d2079
cf80f85
a747a1a
a392516
71cdaf9
b5ad798
eaa2e40
4ffe31f
7ee6b12
b3b1d13
a6bc552
1761b0a
31478c5
d493ba0
f994e15
21200a0
7aa8e44
32ca138
ee9e6ca
95039d4
fc71a83
017deec
5b223b1
f458edd
2087e06
e550341
7574235
f2ffe86
95d096b
b5aa4a5
0c4c7c3
f9afae2
1c251f0
7148ac5
47a14ab
c89f5ed
cf52590
7bbed2e
7a4ef68
fa34452
8f2c427
ab81e29
2b5fede
4050a5b
a1adb06
f2a1949
4dfa05b
4dbd1de
5573462
a5dbf91
3ac973d
075ea73
6aa8c91
7fe96a4
571280a
bfc7530
a06de2b
9115afd
a23d2ea
3d966a1
fba4cea
0a7038b
f377844
02819b2
3d7c5c9
79d9587
d85551e
bc5b395
eec8220
1122220
2fa65e7
e1a9c99
6d481c5
b302dac
c32d37b
31c686a
8036c81
abd0563
33944d4
ec8ff4f
06a93df
a20e4d2
3cd8b26
8a0a031
f26f152
5a68341
3cb22a1
d8b0c67
ed9fc99
078db33
2fa0c23
1cf1c3b
9b5e899
f25f002
6b626f6
88dbcc7
cf7e873
d494963
4311f2c
1855a55
eef02ef
41a47dd
69c9c9b
5880721
8f23a7c
6d218ff
d478387
134337d
499cb59
cbf725c
40d91a7
20ca059
8f4cdd7
3f0764f
a8ad430
367390a
dbaa4d6
9cfbc5e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -770,6 +770,55 @@ class DynamicBrokerConfigTest { | |
| verifyIncorrectLogLocalRetentionProps(2000L, 1000L, -1, 100) | ||
| } | ||
|
|
||
| @Test | ||
| def testDynamicRemoteCopyLagThrowsOnIncorrectConfig(): Unit = { | ||
| // remote copy lag ms cannot exceed effective local retention ms | ||
| verifyIncorrectRemoteCopyLagProps( | ||
| retentionMs = 1000L, | ||
| logLocalRetentionMs = -2L, | ||
| remoteCopyLagMs = 1001L, | ||
| retentionBytes = 1000L, | ||
| logLocalRetentionBytes = -2L, | ||
| remoteCopyLagBytes = 100L | ||
| ) | ||
|
|
||
| // remote copy lag bytes cannot exceed effective local retention bytes | ||
| verifyIncorrectRemoteCopyLagProps( | ||
| retentionMs = 1000L, | ||
| logLocalRetentionMs = -2L, | ||
| remoteCopyLagMs = 100L, | ||
| retentionBytes = 1000L, | ||
| logLocalRetentionBytes = -2L, | ||
| remoteCopyLagBytes = 1001L | ||
| ) | ||
|
|
||
| } | ||
|
|
||
| def verifyIncorrectRemoteCopyLagProps(retentionMs: Long, | ||
| logLocalRetentionMs: Long, | ||
| remoteCopyLagMs: Long, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: rename remoteCopyLagMs -> logRemoteCopyLagMs and remoteCopyLagBytes -> logRemoteCopyLagBytes
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| retentionBytes: Long, | ||
| logLocalRetentionBytes: Long, | ||
| remoteCopyLagBytes: Long): Unit = { | ||
| val props = TestUtils.createBrokerConfig(0, port = 8181) | ||
| props.put(ServerLogConfigs.LOG_RETENTION_TIME_MILLIS_CONFIG, retentionMs.toString) | ||
| props.put(ServerLogConfigs.LOG_RETENTION_BYTES_CONFIG, retentionBytes.toString) | ||
| val config = KafkaConfig(props) | ||
| val dynamicLogConfig = new DynamicLogConfig(mock(classOf[LogManager]), mock(classOf[DirectoryEventHandler])) | ||
| config.dynamicConfig.initialize(None) | ||
| config.dynamicConfig.addBrokerReconfigurable(dynamicLogConfig) | ||
|
|
||
| val newProps = new Properties() | ||
| newProps.put(RemoteLogManagerConfig.LOG_LOCAL_RETENTION_MS_PROP, logLocalRetentionMs.toString) | ||
| newProps.put(RemoteLogManagerConfig.LOG_REMOTE_COPY_LAG_MS_PROP, remoteCopyLagMs.toString) | ||
| newProps.put(RemoteLogManagerConfig.LOG_LOCAL_RETENTION_BYTES_PROP, logLocalRetentionBytes.toString) | ||
| newProps.put(RemoteLogManagerConfig.LOG_REMOTE_COPY_LAG_BYTES_PROP, remoteCopyLagBytes.toString) | ||
| // validate default config | ||
| assertThrows(classOf[ConfigException], () => config.dynamicConfig.validate(newProps, perBrokerConfig = false)) | ||
| // validate per broker config | ||
| assertThrows(classOf[ConfigException], () => config.dynamicConfig.validate(newProps, perBrokerConfig = true)) | ||
| } | ||
|
|
||
| @Test | ||
| def testDynamicRemoteFetchMaxWaitMsConfig(): Unit = { | ||
| val props = TestUtils.createBrokerConfig(0, port = 8181) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -66,6 +66,7 @@ | |
| import org.apache.kafka.storage.internals.log.AsyncOffsetReader; | ||
| import org.apache.kafka.storage.internals.log.EpochEntry; | ||
| import org.apache.kafka.storage.internals.log.FetchDataInfo; | ||
| import org.apache.kafka.storage.internals.log.LogConfig; | ||
| import org.apache.kafka.storage.internals.log.LogOffsetMetadata; | ||
| import org.apache.kafka.storage.internals.log.LogSegment; | ||
| import org.apache.kafka.storage.internals.log.OffsetIndex; | ||
|
|
@@ -916,6 +917,7 @@ private void maybeUpdateCopiedOffset(UnifiedLog log) throws RemoteStorageExcepti | |
| * 1) Segment is not the active segment and | ||
| * 2) Segment end-offset is less than the last-stable-offset as remote storage should contain only | ||
| * committed/acked messages | ||
| * 3) Segment has exceeded copy lag by time or size when configured (remote.copy.lag.ms, remote.copy.lag.bytes) | ||
| * @param log The log from which the segments are to be copied | ||
| * @param fromOffset The offset from which the segments are to be copied | ||
| * @param lastStableOffset The last stable offset of the log | ||
|
|
@@ -925,10 +927,17 @@ List<EnrichedLogSegment> candidateLogSegments(UnifiedLog log, Long fromOffset, L | |
| List<EnrichedLogSegment> candidateLogSegments = new ArrayList<>(); | ||
| List<LogSegment> segments = log.logSegments(fromOffset, Long.MAX_VALUE); | ||
| if (!segments.isEmpty()) { | ||
| long currentTimeMs = time.milliseconds(); | ||
| long totalLogSize = UnifiedLog.sizeInBytes(segments); | ||
| long cumulativeSize = 0; | ||
| for (int idx = 1; idx < segments.size(); idx++) { | ||
| LogSegment previousSeg = segments.get(idx - 1); | ||
| LogSegment currentSeg = segments.get(idx); | ||
| if (currentSeg.baseOffset() <= lastStableOffset) { | ||
| cumulativeSize += previousSeg.size(); | ||
| if (delayCopy(log.config(), previousSeg, currentTimeMs, totalLogSize, cumulativeSize)) { | ||
| break; | ||
| } | ||
| candidateLogSegments.add(new EnrichedLogSegment(previousSeg, currentSeg.baseOffset())); | ||
| } | ||
| } | ||
|
|
@@ -937,6 +946,68 @@ List<EnrichedLogSegment> candidateLogSegments(UnifiedLog log, Long fromOffset, L | |
| return candidateLogSegments; | ||
| } | ||
|
|
||
| private boolean delayCopy(LogConfig logConfig, LogSegment previousSeg, long currentTimeMs, long totalLogSize, long cumulativeSize) { | ||
| if (logConfig == null) { | ||
|
kamalcph marked this conversation as resolved.
Outdated
|
||
| return false; | ||
| } | ||
|
|
||
| long copyLagMs = logConfig.remoteCopyLagMs(); | ||
| long copyLagBytes = logConfig.remoteCopyLagBytes(); | ||
| if (logger.isTraceEnabled()) { | ||
| logger.trace("delayCopy check for segment {}: copyLagMs={}, copyLagBytes={}, currentTimeMs={}, totalLogSize={}, cumulativeSize={}, sizeLagBytes={}", | ||
| previousSeg, copyLagMs, copyLagBytes, currentTimeMs, totalLogSize, cumulativeSize, totalLogSize - cumulativeSize); | ||
| } | ||
|
|
||
| if (copyLagMs == 0 || copyLagBytes == 0) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we log a warning or info message when a user configures one lag property but leaves the other at its default value of 0? |
||
| return false; | ||
| } | ||
|
|
||
| boolean needCheckCopyLagMs = copyLagMs > 0; | ||
| boolean needCheckCopyLagBytes = copyLagBytes > 0; | ||
|
|
||
| // When no lag delay is enabled, upload immediately. | ||
| if (!needCheckCopyLagMs && !needCheckCopyLagBytes) { | ||
|
jiafu1115 marked this conversation as resolved.
Outdated
|
||
| return false; | ||
| } | ||
|
|
||
| // When both lag delays are enabled, delay upload only if both delay checks decide to delay. | ||
| if (needCheckCopyLagMs && needCheckCopyLagBytes) { | ||
| return notExceededCopyLagTime(previousSeg, currentTimeMs, copyLagMs) && notExceededCopyLagSize(previousSeg, totalLogSize, cumulativeSize, copyLagBytes); | ||
| } | ||
|
|
||
| // If only one lag delay is enabled, use that check as the final result. | ||
| if (needCheckCopyLagMs) { | ||
| return notExceededCopyLagTime(previousSeg, currentTimeMs, copyLagMs); | ||
| } | ||
|
|
||
| return notExceededCopyLagSize(previousSeg, totalLogSize, cumulativeSize, copyLagBytes); | ||
| } | ||
|
|
||
| private boolean notExceededCopyLagTime(LogSegment segment, long currentTimeMs, long copyLagMs) { | ||
| try { | ||
| long segmentAgeMs = currentTimeMs - segment.largestTimestamp(); | ||
| boolean exceeded = segmentAgeMs >= copyLagMs; | ||
| if (logger.isTraceEnabled()) { | ||
| logger.trace("{} eligible for upload by time? {} (segment age {} ms, copy lag {} ms)", | ||
| segment, exceeded, segmentAgeMs, copyLagMs); | ||
| } | ||
| return !exceeded; | ||
| } catch (IOException e) { | ||
| logger.warn("Failed to get largest timestamp for segment {}, take it as eligible for upload based on time", segment, e); | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| private boolean notExceededCopyLagSize(LogSegment segment, long totalLogSize, long cumulativeSize, long copyLagBytes) { | ||
| long sizeLagBytes = totalLogSize - cumulativeSize; | ||
| boolean exceeded = sizeLagBytes >= copyLagBytes; | ||
| if (logger.isTraceEnabled()) { | ||
| logger.trace("{} eligible for upload by size? {} (size lag {} bytes, copy lag {} bytes, totalLogSize={}, cumulativeSize={})", | ||
| segment, exceeded, sizeLagBytes, copyLagBytes, totalLogSize, cumulativeSize); | ||
| } | ||
| return !exceeded; | ||
| } | ||
|
|
||
| public void copyLogSegmentsToRemote(UnifiedLog log) throws InterruptedException, RetriableRemoteStorageException { | ||
| if (isCancelled()) | ||
| return; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could you also add one test for valid dynamic broker config change? Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, move the
testDynamicRemoteCopyLagThrowsOnIncorrectConfigtest to DynamicBrokerConfigTest.java instead of DynamicBrokerConfigTest.scala.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can be taken up later, we already have one dynamic broker config change test in KafkaConfigTest.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got it. thanks @kamalcph
Thank you very much for your patient and thorough review.