mirror of https://github.com/grpc/grpc-java.git
xds: Align PriorityLB child selection with A56
The PriorityLB predates A56. tryNextPriority() now matches ChoosePriority() from the gRFC. The biggest change is waiting on CONNECTING children instead of failing after the failOverTimer fires. The failOverTimer should be used to start lower priorities more eagerly, but shouldn't cause the overall connectivity state to become TRANSIENT_FAILURE on its own. The prior behavior of creating the "Connection timeout for priority" failing picker was particularly strange, because it didn't update child's connectivity state. This previous behavior was creating errors because of the failOverTimer with no way to diagnose what was going wrong. b/428517222
This commit is contained in:
parent
6ff8ecac09
commit
c4256add4d
|
@ -154,7 +154,8 @@ final class PriorityLoadBalancer extends LoadBalancer {
|
|||
ChildLbState child =
|
||||
new ChildLbState(priority, priorityConfigs.get(priority).ignoreReresolution);
|
||||
children.put(priority, child);
|
||||
updateOverallState(priority, CONNECTING, new FixedResultPicker(PickResult.withNoResult()));
|
||||
// Child is created in CONNECTING with pending failOverTimer
|
||||
updateOverallState(priority, child.connectivityState, child.picker);
|
||||
// Calling the child's updateResolvedAddresses() can result in tryNextPriority() being
|
||||
// called recursively. We need to be sure to be done with processing here before it is
|
||||
// called.
|
||||
|
@ -173,21 +174,23 @@ final class PriorityLoadBalancer extends LoadBalancer {
|
|||
}
|
||||
return Status.OK;
|
||||
}
|
||||
if (child.failOverTimer != null && child.failOverTimer.isPending()) {
|
||||
if (child.failOverTimer.isPending()) {
|
||||
updateOverallState(priority, child.connectivityState, child.picker);
|
||||
return Status.OK; // Give priority i time to connect.
|
||||
}
|
||||
if (priority.equals(currentPriority) && child.connectivityState != TRANSIENT_FAILURE) {
|
||||
// If the current priority is not changed into TRANSIENT_FAILURE, keep using it.
|
||||
}
|
||||
for (int i = 0; i < priorityNames.size(); i++) {
|
||||
String priority = priorityNames.get(i);
|
||||
ChildLbState child = children.get(priority);
|
||||
if (child.connectivityState.equals(CONNECTING)) {
|
||||
updateOverallState(priority, child.connectivityState, child.picker);
|
||||
return Status.OK;
|
||||
}
|
||||
}
|
||||
// TODO(zdapeng): Include error details of each priority.
|
||||
logger.log(XdsLogLevel.DEBUG, "All priority failed");
|
||||
String lastPriority = priorityNames.get(priorityNames.size() - 1);
|
||||
SubchannelPicker errorPicker = children.get(lastPriority).picker;
|
||||
updateOverallState(lastPriority, TRANSIENT_FAILURE, errorPicker);
|
||||
ChildLbState child = children.get(lastPriority);
|
||||
updateOverallState(lastPriority, child.connectivityState, child.picker);
|
||||
return Status.OK;
|
||||
}
|
||||
|
||||
|
@ -231,10 +234,7 @@ final class PriorityLoadBalancer extends LoadBalancer {
|
|||
// The child is deactivated.
|
||||
return;
|
||||
}
|
||||
picker = new FixedResultPicker(PickResult.withError(
|
||||
Status.UNAVAILABLE.withDescription("Connection timeout for priority " + priority)));
|
||||
logger.log(XdsLogLevel.DEBUG, "Priority {0} failed over to next", priority);
|
||||
currentPriority = null; // reset currentPriority to guarantee failover happen
|
||||
Status status = tryNextPriority();
|
||||
if (!status.isOk()) {
|
||||
// A child had a problem with the addresses/config. Request it to be refreshed
|
||||
|
|
|
@ -376,6 +376,7 @@ public class PriorityLoadBalancerTest {
|
|||
assertThat(fooBalancers).hasSize(2);
|
||||
assertThat(fooHelpers).hasSize(2);
|
||||
LoadBalancer balancer1 = Iterables.getLast(fooBalancers);
|
||||
Helper helper1 = Iterables.getLast(fooHelpers);
|
||||
|
||||
// p1 timeout, and fails over to p2
|
||||
fakeClock.forwardTime(10, TimeUnit.SECONDS);
|
||||
|
@ -423,14 +424,20 @@ public class PriorityLoadBalancerTest {
|
|||
LoadBalancer balancer3 = Iterables.getLast(fooBalancers);
|
||||
Helper helper3 = Iterables.getLast(fooHelpers);
|
||||
|
||||
// p3 timeout then the channel should go to TRANSIENT_FAILURE
|
||||
// p3 timeout then the channel should stay in CONNECTING
|
||||
fakeClock.forwardTime(10, TimeUnit.SECONDS);
|
||||
assertCurrentPickerReturnsError(Status.Code.UNAVAILABLE, "timeout");
|
||||
assertCurrentPicker(CONNECTING, PickResult.withNoResult());
|
||||
|
||||
// p3 fails then the picker should have error status updated
|
||||
// p3 fails then the picker should still be waiting on p1
|
||||
helper3.updateBalancingState(
|
||||
TRANSIENT_FAILURE,
|
||||
new FixedResultPicker(PickResult.withError(Status.DATA_LOSS.withDescription("foo"))));
|
||||
assertCurrentPicker(CONNECTING, PickResult.withNoResult());
|
||||
|
||||
// p1 fails then the picker should have error status updated to p3
|
||||
helper1.updateBalancingState(
|
||||
TRANSIENT_FAILURE,
|
||||
new FixedResultPicker(PickResult.withError(Status.DATA_LOSS.withDescription("bar"))));
|
||||
assertCurrentPickerReturnsError(Status.Code.DATA_LOSS, "foo");
|
||||
|
||||
// p2 gets back to READY
|
||||
|
@ -642,6 +649,7 @@ public class PriorityLoadBalancerTest {
|
|||
assertThat(fooBalancers).hasSize(2);
|
||||
assertThat(fooHelpers).hasSize(2);
|
||||
LoadBalancer balancer1 = Iterables.getLast(fooBalancers);
|
||||
Helper helper1 = Iterables.getLast(fooHelpers);
|
||||
|
||||
// p1 timeout, and fails over to p2
|
||||
fakeClock.forwardTime(10, TimeUnit.SECONDS);
|
||||
|
@ -677,14 +685,20 @@ public class PriorityLoadBalancerTest {
|
|||
LoadBalancer balancer3 = Iterables.getLast(fooBalancers);
|
||||
Helper helper3 = Iterables.getLast(fooHelpers);
|
||||
|
||||
// p3 timeout then the channel should go to TRANSIENT_FAILURE
|
||||
// p3 timeout then the channel should stay in CONNECTING
|
||||
fakeClock.forwardTime(10, TimeUnit.SECONDS);
|
||||
assertCurrentPickerReturnsError(Status.Code.UNAVAILABLE, "timeout");
|
||||
assertCurrentPicker(CONNECTING, PickResult.withNoResult());
|
||||
|
||||
// p3 fails then the picker should have error status updated
|
||||
// p3 fails then the picker should still be waiting on p1
|
||||
helper3.updateBalancingState(
|
||||
TRANSIENT_FAILURE,
|
||||
new FixedResultPicker(PickResult.withError(Status.DATA_LOSS.withDescription("foo"))));
|
||||
assertCurrentPicker(CONNECTING, PickResult.withNoResult());
|
||||
|
||||
// p1 fails then the picker should have error status updated to p3
|
||||
helper1.updateBalancingState(
|
||||
TRANSIENT_FAILURE,
|
||||
new FixedResultPicker(PickResult.withError(Status.DATA_LOSS.withDescription("bar"))));
|
||||
assertCurrentPickerReturnsError(Status.Code.DATA_LOSS, "foo");
|
||||
|
||||
// p2 gets back to IDLE
|
||||
|
@ -863,15 +877,17 @@ public class PriorityLoadBalancerTest {
|
|||
}
|
||||
|
||||
private void assertCurrentPickerPicksSubchannel(Subchannel expectedSubchannelToPick) {
|
||||
assertLatestConnectivityState(READY);
|
||||
PickResult pickResult = pickerCaptor.getValue().pickSubchannel(mock(PickSubchannelArgs.class));
|
||||
assertThat(pickResult.getSubchannel()).isEqualTo(expectedSubchannelToPick);
|
||||
assertCurrentPicker(READY, PickResult.withSubchannel(expectedSubchannelToPick));
|
||||
}
|
||||
|
||||
private void assertCurrentPickerIsBufferPicker() {
|
||||
assertLatestConnectivityState(IDLE);
|
||||
assertCurrentPicker(IDLE, PickResult.withNoResult());
|
||||
}
|
||||
|
||||
private void assertCurrentPicker(ConnectivityState state, PickResult result) {
|
||||
assertLatestConnectivityState(state);
|
||||
PickResult pickResult = pickerCaptor.getValue().pickSubchannel(mock(PickSubchannelArgs.class));
|
||||
assertThat(pickResult).isEqualTo(PickResult.withNoResult());
|
||||
assertThat(pickResult).isEqualTo(result);
|
||||
}
|
||||
|
||||
private Object newChildConfig(LoadBalancerProvider provider, Object config) {
|
||||
|
|
Loading…
Reference in New Issue