Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions ambry-api/src/main/java/com/github/ambry/config/RouterConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ public class RouterConfig {
public static final String ROUTER_STORE_KEY_CONVERTER_FACTORY = "router.store.key.converter.factory";
public static final String ROUTER_UNAVAILABLE_DUE_TO_OFFLINE_REPLICAS = "router.unavailable.due.to.offline.replicas";
public static final String ROUTER_NOT_FOUND_CACHE_TTL_IN_MS = "router.not.found.cache.ttl.in.ms";
public static final String ROUTER_NAMED_BLOB_TRANSLATE_NOT_FOUND_TO_UNAVAILABLE_ENABLED =
"router.named.blob.translate.not.found.to.unavailable.enabled";
public static final String ROUTER_UPDATE_OP_METADATA_RELIANCE_TIMESTAMP_IN_MS =
"router.update.op.metadata.reliance.timestamp.in.ms";
public static final String ROUTER_UNAVAILABLE_DUE_TO_SUCCESS_COUNT_IS_NON_ZERO_FOR_DELETE =
Expand Down Expand Up @@ -647,6 +649,17 @@ public class RouterConfig {
@Default("15*1000")
public final long routerNotFoundCacheTtlInMs;

/**
* If {@code true}, when a named blob's metadata row exists but storage replicas return
* {@link com.github.ambry.router.RouterErrorCode#BlobDoesNotExist}, the router translates the
* error to {@link com.github.ambry.router.RouterErrorCode#AmbryUnavailable} (retryable 503)
* instead of letting it surface as an authoritative 404. Acts as a kill switch for the
* behavior added in PR #3234 / AMBRY-14247.
*/
@Config(ROUTER_NAMED_BLOB_TRANSLATE_NOT_FOUND_TO_UNAVAILABLE_ENABLED)
@Default("true")
public final boolean routerNamedBlobTranslateNotFoundToUnavailableEnabled;

public static final String ROUTER_BLOB_METADATA_CACHE_ID = "router.blob.metadata.cache.id";
@Config(ROUTER_BLOB_METADATA_CACHE_ID)
public final String routerBlobMetadataCacheId;
Expand Down Expand Up @@ -935,6 +948,8 @@ public RouterConfig(VerifiableProperties verifiableProperties) {
verifiableProperties.getString(OPERATION_CONTROLLER, "com.github.ambry.router.OperationController");
routerNotFoundCacheTtlInMs = verifiableProperties.getLongInRange(ROUTER_NOT_FOUND_CACHE_TTL_IN_MS, 15 * 1000L, 0,
ROUTER_NOT_FOUND_CACHE_MAX_TTL_IN_MS);
routerNamedBlobTranslateNotFoundToUnavailableEnabled =
verifiableProperties.getBoolean(ROUTER_NAMED_BLOB_TRANSLATE_NOT_FOUND_TO_UNAVAILABLE_ENABLED, true);
routerUpdateOpMetadataRelianceTimestampInMs =
verifiableProperties.getLong(ROUTER_UPDATE_OP_METADATA_RELIANCE_TIMESTAMP_IN_MS,
DEFAULT_ROUTER_UPDATE_OP_METADATA_RELIANCE_TIMESTAMP_IN_MS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,9 @@ private Exception translateNamedBlobMissingInStorage(String resolvedBlobId, Exce
if (e instanceof RouterException
&& ((RouterException) e).getErrorCode() == RouterErrorCode.BlobDoesNotExist) {
routerMetrics.namedBlobMetadataExistsButStorageNotFoundCount.inc();
if (!routerConfig.routerNamedBlobTranslateNotFoundToUnavailableEnabled) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why don't we just use the config to gate the translateNamedBlobMissingInStorage method?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the clause here so that the metric on the above line namedBlobMetadataExistsButStorageNotFoundCount still increments when the config is disabled, so that we still have insight into it whenever the metadata-vs-storage divergence happens.

return e;
}
logger.warn("Named blob metadata exists but storage returned BlobNotFound for blob {}; "
+ "translating to AmbryUnavailable (retryable 503)", resolvedBlobId);
return new RouterException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1270,6 +1270,24 @@ public void testNamedBlobMetadataNotFoundStillReturnsNotFound() throws Exception
}
}

/** With the gating config disabled, storage NOT_FOUND surfaces as BlobDoesNotExist (no translation). */
@Test
public void testNamedBlobMissingInStorageNotTranslatedWhenConfigDisabled() throws Exception {
AtomicReference<String> storedBlobId = new AtomicReference<>();
Properties props = getNonBlockingRouterProperties(localDcName);
props.setProperty("router.named.blob.translate.not.found.to.unavailable.enabled", "false");
MockServerLayout layout = setUpNamedBlobAndPut(storedBlobId, props);
try {
layout.getMockServers().forEach(s -> s.setServerErrorForAllRequests(ServerErrorCode.BlobNotFound));
Assert.assertEquals(RouterErrorCode.BlobDoesNotExist,
((RouterException) expectGetFailure("a", "c", "b").getCause()).getErrorCode());
// Metric still increments for observability even when translation is disabled.
Assert.assertEquals(1L, routerMetrics.namedBlobMetadataExistsButStorageNotFoundCount.getCount());
} finally {
if (router != null) { router.close(); assertClosed(); }
}
}

/** Translation also fires when getBlobHelper short-circuits via the notFoundCache. */
@Test
public void testNamedBlobMissingViaNotFoundCacheStillTranslatedToAmbryUnavailable() throws Exception {
Expand All @@ -1288,6 +1306,12 @@ public void testNamedBlobMissingViaNotFoundCacheStillTranslatedToAmbryUnavailabl
/** Wire a mock IdConverter, set up the router, PUT a named blob {@code /a/c/b}, and stub the
* GET-path {@code convert(RestRequest, String)} to return the resolved blob ID. */
private MockServerLayout setUpNamedBlobAndPut(AtomicReference<String> storedBlobId) throws Exception {
return setUpNamedBlobAndPut(storedBlobId, getNonBlockingRouterProperties(localDcName));
}

/** Variant of {@link #setUpNamedBlobAndPut(AtomicReference)} that accepts custom router properties. */
private MockServerLayout setUpNamedBlobAndPut(AtomicReference<String> storedBlobId, Properties props)
throws Exception {
CountDownLatch putLatch = new CountDownLatch(1);
// Wire a mock IdConverterFactory + IdConverter.
IdConverterFactory factory = mock(IdConverterFactory.class);
Expand All @@ -1308,8 +1332,7 @@ private MockServerLayout setUpNamedBlobAndPut(AtomicReference<String> storedBlob
});
// Build the router with the mock factory.
MockServerLayout layout = new MockServerLayout(mockClusterMap);
setRouterWithIdConverterFactory(getNonBlockingRouterProperties(localDcName), layout,
new LoggingNotificationSystem(), factory);
setRouterWithIdConverterFactory(props, layout, new LoggingNotificationSystem(), factory);
// PUT a named blob /a/c/b, then wait for the convert mock to have run.
setOperationParams();
router.putBlob(createNamedBlobRestRequest(RestMethod.PUT, "a", "c", "b", false, false, false, false),
Expand Down
Loading