Skip to content

Commit 44f9c5b

Browse files
Check for app-limited at the beginning of the packet recv / send loop
The new logic matches the app-limited detection algorithm described in the BBR RFC draft, which involves checking for the previous state of the connection before timeout and ack processing.
1 parent d63a1dd commit 44f9c5b

17 files changed

Lines changed: 420 additions & 30 deletions

File tree

quiche/src/lib.rs

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,7 @@ pub struct Config {
575575
grease: bool,
576576

577577
cc_algorithm: CongestionControlAlgorithm,
578+
enable_bbr_app_limited_fix: bool,
578579
custom_bbr_params: Option<BbrParams>,
579580
initial_congestion_window_packets: usize,
580581
enable_relaxed_loss_threshold: bool,
@@ -653,6 +654,7 @@ impl Config {
653654
application_protos: Vec::new(),
654655
grease: true,
655656
cc_algorithm: CongestionControlAlgorithm::CUBIC,
657+
enable_bbr_app_limited_fix: false,
656658
custom_bbr_params: None,
657659
initial_congestion_window_packets:
658660
DEFAULT_INITIAL_CONGESTION_WINDOW_PACKETS,
@@ -1067,6 +1069,15 @@ impl Config {
10671069
self.cc_algorithm = algo;
10681070
}
10691071

1072+
/// Enable a rework of how the BBR congestion control implementation
1073+
/// computes app-limited. This config parameter will be removed after we
1074+
/// build confidence on the new implementation or decide to roll it back.
1075+
///
1076+
/// Defaults to `false`.
1077+
pub fn set_enable_bbr_app_limited_fix(&mut self, value: bool) {
1078+
self.enable_bbr_app_limited_fix = value;
1079+
}
1080+
10701081
/// Sets custom BBR settings.
10711082
///
10721083
/// This API is experimental and will be removed in the future.
@@ -2233,6 +2244,32 @@ impl<F: BufFactory> Connection<F> {
22332244
Ok(())
22342245
}
22352246

2247+
/// Enable a rework of how the BBR congestion control implementation
2248+
/// computes app-limited. This config parameter will be removed after we
2249+
/// build confidence on the new implementation or decide to roll it back.
2250+
///
2251+
/// Currently this only applies if cc_algorithm is
2252+
/// `CongestionControlAlgorithm::Bbr2Gcongestion`.
2253+
///
2254+
/// This function can only be called inside one of BoringSSL's handshake
2255+
/// callbacks, before any packet has been sent. Calling this function any
2256+
/// other time will have no effect.
2257+
///
2258+
/// See [`Config::set_enable_bbr_app_limited_fix()`].
2259+
///
2260+
/// [`Config::set_enable_bbr_app_limited_fix()`]: struct.Config.html#method.set_enable_bbr_app_limited_fix
2261+
#[cfg(feature = "boringssl-boring-crate")]
2262+
#[cfg_attr(docsrs, doc(cfg(feature = "boringssl-boring-crate")))]
2263+
pub fn set_enable_bbr_app_limited_fix_in_handshake(
2264+
ssl: &mut boring::ssl::SslRef, value: bool,
2265+
) -> Result<()> {
2266+
let ex_data = tls::ExData::from_ssl_ref(ssl).ok_or(Error::TlsFail)?;
2267+
2268+
ex_data.recovery_config.enable_bbr_app_limited_fix = value;
2269+
2270+
Ok(())
2271+
}
2272+
22362273
/// Sets the congestion control algorithm used by string.
22372274
///
22382275
/// This function can only be called inside one of BoringSSL's handshake
@@ -2494,6 +2531,51 @@ impl<F: BufFactory> Connection<F> {
24942531
Ok(())
24952532
}
24962533

2534+
/// Returns true if at least 1 stream has headers or body data to write.
2535+
pub fn has_flushable_stream(&self) -> bool {
2536+
self.streams.has_flushable()
2537+
}
2538+
2539+
/// Signal the start of an iteration of the event loop that will
2540+
/// receive packets and then attempt to send packets.
2541+
///
2542+
/// This hook is used to implement the "Detecting application-limited
2543+
/// phases" logic described in the BBR RFC draft which is described at:
2544+
/// https://www.ietf.org/archive/id/draft-ietf-ccwg-bbr-04.html#name-detecting-application-limit
2545+
///
2546+
/// This function must be invoked at the beginning of each iteration of the
2547+
/// connection work loop, before receiving packets from the network and
2548+
/// processing ACKs/timeouts.
2549+
///
2550+
/// The expected work loop order is:
2551+
/// 1. Call work_loop_round_start with the value of has_flushable_stream
2552+
/// from the end of the previous loop, before polling for additional
2553+
/// application data.
2554+
/// 2. Process timeouts by calling conn.on_timeout().
2555+
/// 3. Call conn.recv() to process received packets which contain ACKs and
2556+
/// stream data.
2557+
/// 4. Call conn.send_on_path() to generate new packets and send them.
2558+
/// 5. Save the value of conn.has_flushable_stream() for the next iteration.
2559+
/// 6. Poll for additional data from upstream or wait for the next send
2560+
/// timeout.
2561+
///
2562+
/// In cases where the new packet generate + send loop yields early due to
2563+
/// an interation limit or the sendmsg on the socket returning EAGAIN, we
2564+
/// should expect had_flushable_stream_before_poll to remain true since the
2565+
/// connection wasn't able to send all the available data despite not
2566+
/// consuming the full congestion window. The call to
2567+
/// bbr_check_if_app_limited() will not mark the connection app-limited
2568+
/// at the last-sent-packet-number because had_flushable_stream_before_poll
2569+
/// is true; this is correct and expected behavior.
2570+
pub fn work_loop_round_start(
2571+
&mut self, had_flushable_stream_before_poll: bool, now: &Instant,
2572+
) {
2573+
for (_, path) in self.paths.iter_mut() {
2574+
path.recovery
2575+
.bbr_check_if_app_limited(had_flushable_stream_before_poll, now);
2576+
}
2577+
}
2578+
24972579
/// Processes QUIC packets received from the peer.
24982580
///
24992581
/// On success the number of bytes processed from the input buffer is

quiche/src/recovery/congestion/recovery.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -956,7 +956,6 @@ impl RecoveryOps for LegacyRecovery {
956956
self.detect_lost_packets(epoch, now, "")
957957
}
958958

959-
// FIXME only used by gcongestion
960959
fn on_app_limited(&mut self) {
961960
// Not implemented for legacy recovery, update_app_limited and
962961
// delivery_rate_update_app_limited used instead.
@@ -1035,6 +1034,13 @@ impl RecoveryOps for LegacyRecovery {
10351034
false
10361035
}
10371036

1037+
fn bbr_check_if_app_limited(
1038+
&mut self, _had_flushable_stream_before_poll: bool, _now: &Instant,
1039+
) {
1040+
// Not implemented -- only used by the BBR implementation
1041+
// which lives in the gcongestion directory.
1042+
}
1043+
10381044
fn lost_count(&self) -> usize {
10391045
self.congestion.lost_count
10401046
}

quiche/src/recovery/gcongestion/bbr/bandwidth_sampler.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,7 @@ impl BandwidthSampler {
501501
}
502502
}
503503

504-
#[allow(dead_code)]
504+
#[cfg(test)]
505505
pub(crate) fn is_app_limited(&self) -> bool {
506506
self.is_app_limited
507507
}

quiche/src/recovery/gcongestion/bbr2.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -739,8 +739,9 @@ impl CongestionControl for BBRv2 {
739739
self.last_quiescence_start.is_none()
740740
}
741741

742-
fn is_cwnd_limited(&self, bytes_in_flight: usize) -> bool {
743-
bytes_in_flight >= self.get_congestion_window()
742+
#[cfg(test)]
743+
fn is_app_limited(&self) -> bool {
744+
self.mode.network_model().is_app_limited()
744745
}
745746

746747
fn pacing_rate(

quiche/src/recovery/gcongestion/bbr2/network_model.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,11 @@ impl BBRv2NetworkModel {
753753
self.bandwidth_sampler.on_app_limited()
754754
}
755755

756+
#[cfg(test)]
757+
pub(super) fn is_app_limited(&self) -> bool {
758+
self.bandwidth_sampler.is_app_limited()
759+
}
760+
756761
pub(super) fn loss_events_in_round(&self) -> usize {
757762
self.loss_events_in_round
758763
}

quiche/src/recovery/gcongestion/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ pub(super) trait CongestionControl: Debug {
110110

111111
fn is_in_recovery(&self) -> bool;
112112

113-
#[allow(dead_code)]
114-
fn is_cwnd_limited(&self, bytes_in_flight: usize) -> bool;
113+
#[cfg(test)]
114+
fn is_app_limited(&self) -> bool;
115115

116116
fn pacing_rate(
117117
&self, bytes_in_flight: usize, rtt_stats: &RttStats,

quiche/src/recovery/gcongestion/pacer.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -273,12 +273,7 @@ impl Pacer {
273273
}
274274

275275
#[cfg(test)]
276-
pub fn is_app_limited(&self, bytes_in_flight: usize) -> bool {
277-
!self.is_cwnd_limited(bytes_in_flight)
278-
}
279-
280-
#[cfg(test)]
281-
fn is_cwnd_limited(&self, bytes_in_flight: usize) -> bool {
282-
!self.pacing_limited && self.sender.is_cwnd_limited(bytes_in_flight)
276+
pub fn is_app_limited(&self) -> bool {
277+
self.sender.is_app_limited()
283278
}
284279
}

quiche/src/recovery/gcongestion/recovery.rs

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ use crate::recovery::RecoveryConfig;
3131
use crate::recovery::RecoveryOps;
3232
use crate::recovery::RecoveryStats;
3333
use crate::recovery::ReleaseDecision;
34+
use crate::recovery::ReleaseTime;
3435
use crate::recovery::Sent;
3536
use crate::recovery::StartupExit;
3637
use crate::recovery::GRANULARITY;
@@ -466,6 +467,8 @@ pub struct GRecovery {
466467
lost_reuse: Vec<Lost>,
467468

468469
pacer: Pacer,
470+
471+
enable_bbr_app_limited_fix: bool,
469472
}
470473

471474
impl GRecovery {
@@ -520,6 +523,8 @@ impl GRecovery {
520523

521524
newly_acked: Vec::new(),
522525
lost_reuse: Vec::new(),
526+
enable_bbr_app_limited_fix: recovery_config
527+
.enable_bbr_app_limited_fix,
523528
})
524529
}
525530

@@ -639,6 +644,14 @@ impl GRecovery {
639644
self.loss_timer.update(timeout);
640645
}
641646
}
647+
648+
fn next_release_time_in_past(&self, now: &Instant) -> bool {
649+
let next_release_time = self.pacer.get_next_release_time();
650+
match next_release_time.time {
651+
ReleaseTime::Immediate => true,
652+
ReleaseTime::At(time) => time.lt(now),
653+
}
654+
}
642655
}
643656

644657
impl RecoveryOps for GRecovery {
@@ -1035,7 +1048,9 @@ impl RecoveryOps for GRecovery {
10351048

10361049
// FIXME only used by gcongestion
10371050
fn on_app_limited(&mut self) {
1038-
self.pacer.on_app_limited(self.bytes_in_flight.get())
1051+
if !self.enable_bbr_app_limited_fix {
1052+
self.pacer.on_app_limited(self.bytes_in_flight.get())
1053+
}
10391054
}
10401055

10411056
#[cfg(test)]
@@ -1099,7 +1114,7 @@ impl RecoveryOps for GRecovery {
10991114

11001115
#[cfg(test)]
11011116
fn app_limited(&self) -> bool {
1102-
self.pacer.is_app_limited(self.bytes_in_flight.get())
1117+
self.pacer.is_app_limited()
11031118
}
11041119

11051120
// FIXME only used by congestion
@@ -1124,6 +1139,47 @@ impl RecoveryOps for GRecovery {
11241139
true
11251140
}
11261141

1142+
fn bbr_check_if_app_limited(
1143+
&mut self, had_flushable_stream_before_poll: bool, now: &Instant,
1144+
) {
1145+
if !self.enable_bbr_app_limited_fix {
1146+
return;
1147+
}
1148+
1149+
// Implements CheckIfApplicationLimited from the BBR RFC.
1150+
//
1151+
// had_flushable_stream_before_poll is used to check for `NoUnsentData()`
1152+
// and skips this check there is data on the connection's tx
1153+
// buffer. Retransmisions are done with the help of stream send
1154+
// buffers so the `NoUnsentData()` check also covers the
1155+
// `C.lost_out <= C.retrans_out` check.
1156+
//
1157+
// The cwnd vs inflight check needs to take frame overheads
1158+
// into account since due to these overheads it may not be
1159+
// possible for the connection to use every last byte of the
1160+
// available window. The check could be relaxed further by
1161+
// asserting that cwnd - inflight is less than 1 packet's
1162+
// worth of bytes.
1163+
//
1164+
// The check for C.pending_transmissions == 0 is done by the
1165+
// call to next_release_time_in_past; this method returns true
1166+
// when lower layers and the pacer would send a packet on the
1167+
// network immediately if one were provided.
1168+
//
1169+
// The case of Immediate next_release_time results in the work_loop
1170+
// iterating until min(cwnd, tx buffer) is exhausted. If we decide to
1171+
// exit the send loop earlier we should mark the connection in some way so
1172+
// the next call to bbr_check_if_app_limited does not mark the
1173+
// connection app-limited after yielding when C.pending_transmissions > 0.
1174+
if !had_flushable_stream_before_poll &&
1175+
self.cwnd() >
1176+
self.bytes_in_flight.get() + frame::MAX_STREAM_OVERHEAD &&
1177+
self.next_release_time_in_past(now)
1178+
{
1179+
self.pacer.on_app_limited(self.bytes_in_flight.get())
1180+
}
1181+
}
1182+
11271183
#[cfg(feature = "qlog")]
11281184
fn state_str(&self, _now: Instant) -> &'static str {
11291185
self.pacer.state_str()

quiche/src/recovery/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ pub struct RecoveryConfig {
130130
pub max_send_udp_payload_size: usize,
131131
pub max_ack_delay: Duration,
132132
pub cc_algorithm: CongestionControlAlgorithm,
133+
pub enable_bbr_app_limited_fix: bool,
133134
pub custom_bbr_params: Option<BbrParams>,
134135
pub hystart: bool,
135136
pub pacing: bool,
@@ -145,6 +146,7 @@ impl RecoveryConfig {
145146
max_send_udp_payload_size: config.max_send_udp_payload_size,
146147
max_ack_delay: Duration::ZERO,
147148
cc_algorithm: config.cc_algorithm,
149+
enable_bbr_app_limited_fix: config.enable_bbr_app_limited_fix,
148150
custom_bbr_params: config.custom_bbr_params,
149151
hystart: config.hystart,
150152
pacing: config.pacing,
@@ -315,6 +317,10 @@ pub trait RecoveryOps {
315317
fn get_next_release_time(&self) -> ReleaseDecision;
316318

317319
fn gcongestion_enabled(&self) -> bool;
320+
321+
fn bbr_check_if_app_limited(
322+
&mut self, had_flushable_stream_before_poll: bool, now: &Instant,
323+
);
318324
}
319325

320326
impl Recovery {

quiche/src/test_utils.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,9 +340,16 @@ pub fn recv_send<F: BufFactory>(
340340
Ok(off)
341341
}
342342

343+
pub fn run_work_loop_round_start_hook(conn: &mut Connection) {
344+
let has_flushable_stream = conn.has_flushable_stream();
345+
conn.work_loop_round_start(has_flushable_stream, &Instant::now());
346+
}
347+
343348
pub fn process_flight(
344349
conn: &mut Connection, flight: Vec<(Vec<u8>, SendInfo)>,
345350
) -> Result<()> {
351+
run_work_loop_round_start_hook(conn);
352+
346353
for (mut pkt, si) in flight {
347354
let info = RecvInfo {
348355
to: si.to,

0 commit comments

Comments
 (0)