mirror of https://github.com/grpc/grpc-java.git
LBs should avoid calling LBs after lb.shutdown()
LoadBalancers shouldn't be called after shutdown(), but RingHashLb could
have enqueued work to the SynchronizationContext that executed after
shutdown(). This commit fixes problems discovered when auditing all LBs
usage of the syncContext for that type of problem.
Similarly, PickFirstLb could have requested a new connection after
shutdown(). We want to avoid that sort of thing too.
RingHashLb's test changed from CONNECTING to TRANSIENT_FAILURE to get
the latest picker. Because two subchannels have failed it will be in
TRANSIENT_FAILURE. Previously the test was using an older picker with
out-of-date subchannelView, and the verifyConnection() was too imprecise
to notice it was creating the wrong subchannel.
As discovered in b/430347751, where ClusterImplLb was seeing a new
subchannel being called after the child LB was shutdown (the shutdown
itself had been caused by RingHashConfig not implementing equals() and
was fixed by a8de9f07ab
, which caused ClusterResolverLb to replace its
state):
```
java.lang.NullPointerException
at io.grpc.xds.ClusterImplLoadBalancer$ClusterImplLbHelper.createClusterLocalityFromAttributes(ClusterImplLoadBalancer.java:322)
at io.grpc.xds.ClusterImplLoadBalancer$ClusterImplLbHelper.createSubchannel(ClusterImplLoadBalancer.java:236)
at io.grpc.util.ForwardingLoadBalancerHelper.createSubchannel(ForwardingLoadBalancerHelper.java:47)
at io.grpc.util.ForwardingLoadBalancerHelper.createSubchannel(ForwardingLoadBalancerHelper.java:47)
at io.grpc.internal.PickFirstLeafLoadBalancer.createNewSubchannel(PickFirstLeafLoadBalancer.java:527)
at io.grpc.internal.PickFirstLeafLoadBalancer.requestConnection(PickFirstLeafLoadBalancer.java:459)
at io.grpc.internal.PickFirstLeafLoadBalancer.acceptResolvedAddresses(PickFirstLeafLoadBalancer.java:174)
at io.grpc.xds.LazyLoadBalancer$LazyDelegate.activate(LazyLoadBalancer.java:64)
at io.grpc.xds.LazyLoadBalancer$LazyDelegate.requestConnection(LazyLoadBalancer.java:97)
at io.grpc.util.ForwardingLoadBalancer.requestConnection(ForwardingLoadBalancer.java:61)
at io.grpc.xds.RingHashLoadBalancer$RingHashPicker.lambda$pickSubchannel$0(RingHashLoadBalancer.java:440)
at io.grpc.SynchronizationContext.drain(SynchronizationContext.java:96)
at io.grpc.SynchronizationContext.execute(SynchronizationContext.java:128)
at io.grpc.xds.client.XdsClientImpl$ResourceSubscriber.onData(XdsClientImpl.java:817)
```
This commit is contained in:
parent
6935d3a115
commit
1fc4ab0bb2
|
@ -1189,6 +1189,10 @@ public abstract class LoadBalancer {
|
|||
* Returns a {@link SynchronizationContext} that runs tasks in the same Synchronization Context
|
||||
* as that the callback methods on the {@link LoadBalancer} interface are run in.
|
||||
*
|
||||
* <p>Work added to the synchronization context might not run immediately, so LB implementations
|
||||
* must be careful to ensure that any assumptions still hold when it is executed. In particular,
|
||||
* the LB might have been shut down or subchannels might have changed state.
|
||||
*
|
||||
* <p>Pro-tip: in order to call {@link SynchronizationContext#schedule}, you need to provide a
|
||||
* {@link ScheduledExecutorService}. {@link #getScheduledExecutorService} is provided for your
|
||||
* convenience.
|
||||
|
|
|
@ -1967,6 +1967,9 @@ final class ManagedChannelImpl extends ManagedChannel implements
|
|||
public void requestConnection() {
|
||||
syncContext.throwIfNotInThisSynchronizationContext();
|
||||
checkState(started, "not started");
|
||||
if (shutdown) {
|
||||
return;
|
||||
}
|
||||
subchannel.obtainActiveTransport();
|
||||
}
|
||||
|
||||
|
|
|
@ -134,7 +134,7 @@ final class PickFirstLoadBalancer extends LoadBalancer {
|
|||
SubchannelPicker picker;
|
||||
switch (newState) {
|
||||
case IDLE:
|
||||
picker = new RequestConnectionPicker(subchannel);
|
||||
picker = new RequestConnectionPicker();
|
||||
break;
|
||||
case CONNECTING:
|
||||
// It's safe to use RequestConnectionPicker here, so when coming from IDLE we could leave
|
||||
|
@ -197,22 +197,12 @@ final class PickFirstLoadBalancer extends LoadBalancer {
|
|||
|
||||
/** Picker that requests connection during the first pick, and returns noResult. */
|
||||
private final class RequestConnectionPicker extends SubchannelPicker {
|
||||
private final Subchannel subchannel;
|
||||
private final AtomicBoolean connectionRequested = new AtomicBoolean(false);
|
||||
|
||||
RequestConnectionPicker(Subchannel subchannel) {
|
||||
this.subchannel = checkNotNull(subchannel, "subchannel");
|
||||
}
|
||||
|
||||
@Override
|
||||
public PickResult pickSubchannel(PickSubchannelArgs args) {
|
||||
if (connectionRequested.compareAndSet(false, true)) {
|
||||
helper.getSynchronizationContext().execute(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
subchannel.requestConnection();
|
||||
}
|
||||
});
|
||||
helper.getSynchronizationContext().execute(PickFirstLoadBalancer.this::requestConnection);
|
||||
}
|
||||
return PickResult.withNoResult();
|
||||
}
|
||||
|
|
|
@ -1767,6 +1767,19 @@ public class ManagedChannelImplTest {
|
|||
any(SocketAddress.class), any(ClientTransportOptions.class), any(ChannelLogger.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void subchannelsRequestConnectionNoopAfterShutdown() {
|
||||
createChannel();
|
||||
Subchannel sub1 =
|
||||
createSubchannelSafely(helper, addressGroup, Attributes.EMPTY, subchannelStateListener);
|
||||
|
||||
shutdownSafely(helper, sub1);
|
||||
requestConnectionSafely(helper, sub1);
|
||||
verify(mockTransportFactory, never())
|
||||
.newClientTransport(
|
||||
any(SocketAddress.class), any(ClientTransportOptions.class), any(ChannelLogger.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void subchannelsNoConnectionShutdownNow() {
|
||||
createChannel();
|
||||
|
|
|
@ -122,7 +122,7 @@ class ShufflingPickFirstLoadBalancer extends LoadBalancer {
|
|||
SubchannelPicker picker;
|
||||
switch (currentState) {
|
||||
case IDLE:
|
||||
picker = new RequestConnectionPicker(subchannel);
|
||||
picker = new RequestConnectionPicker();
|
||||
break;
|
||||
case CONNECTING:
|
||||
picker = new Picker(PickResult.withNoResult());
|
||||
|
@ -182,22 +182,13 @@ class ShufflingPickFirstLoadBalancer extends LoadBalancer {
|
|||
*/
|
||||
private final class RequestConnectionPicker extends SubchannelPicker {
|
||||
|
||||
private final Subchannel subchannel;
|
||||
private final AtomicBoolean connectionRequested = new AtomicBoolean(false);
|
||||
|
||||
RequestConnectionPicker(Subchannel subchannel) {
|
||||
this.subchannel = checkNotNull(subchannel, "subchannel");
|
||||
}
|
||||
|
||||
@Override
|
||||
public PickResult pickSubchannel(PickSubchannelArgs args) {
|
||||
if (connectionRequested.compareAndSet(false, true)) {
|
||||
helper.getSynchronizationContext().execute(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
subchannel.requestConnection();
|
||||
}
|
||||
});
|
||||
helper.getSynchronizationContext().execute(
|
||||
ShufflingPickFirstLoadBalancer.this::requestConnection);
|
||||
}
|
||||
return PickResult.withNoResult();
|
||||
}
|
||||
|
|
|
@ -99,11 +99,13 @@ final class LazyLoadBalancer extends ForwardingLoadBalancer {
|
|||
|
||||
@Override
|
||||
public void shutdown() {
|
||||
delegate = new NoopLoadBalancer();
|
||||
}
|
||||
|
||||
private final class LazyPicker extends SubchannelPicker {
|
||||
@Override
|
||||
public PickResult pickSubchannel(PickSubchannelArgs args) {
|
||||
// activate() is a no-op after shutdown()
|
||||
helper.getSynchronizationContext().execute(LazyDelegate.this::activate);
|
||||
return PickResult.withNoResult();
|
||||
}
|
||||
|
@ -121,4 +123,17 @@ final class LazyLoadBalancer extends ForwardingLoadBalancer {
|
|||
return new LazyLoadBalancer(helper, delegate);
|
||||
}
|
||||
}
|
||||
|
||||
private static final class NoopLoadBalancer extends LoadBalancer {
|
||||
@Override
|
||||
public Status acceptResolvedAddresses(ResolvedAddresses resolvedAddresses) {
|
||||
return Status.OK;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleNameResolutionError(Status error) {}
|
||||
|
||||
@Override
|
||||
public void shutdown() {}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -438,7 +438,9 @@ final class RingHashLoadBalancer extends MultiChildLoadBalancer {
|
|||
|
||||
if (subchannelView.connectivityState == IDLE) {
|
||||
syncContext.execute(() -> {
|
||||
childLbState.getLb().requestConnection();
|
||||
if (childLbState.getCurrentState() == IDLE) {
|
||||
childLbState.getLb().requestConnection();
|
||||
}
|
||||
});
|
||||
|
||||
return PickResult.withNoResult(); // Indicates that this should be retried after backoff
|
||||
|
@ -456,10 +458,11 @@ final class RingHashLoadBalancer extends MultiChildLoadBalancer {
|
|||
return childLbState.getCurrentPicker().pickSubchannel(args);
|
||||
}
|
||||
if (!requestedConnection && subchannelView.connectivityState == IDLE) {
|
||||
syncContext.execute(
|
||||
() -> {
|
||||
childLbState.getLb().requestConnection();
|
||||
});
|
||||
syncContext.execute(() -> {
|
||||
if (childLbState.getCurrentState() == IDLE) {
|
||||
childLbState.getLb().requestConnection();
|
||||
}
|
||||
});
|
||||
requestedConnection = true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Copyright 2025 The gRPC Authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package io.grpc.xds;
|
||||
|
||||
import static com.google.common.truth.Truth.assertThat;
|
||||
|
||||
import io.grpc.CallOptions;
|
||||
import io.grpc.ConnectivityState;
|
||||
import io.grpc.EquivalentAddressGroup;
|
||||
import io.grpc.LoadBalancer;
|
||||
import io.grpc.LoadBalancer.ResolvedAddresses;
|
||||
import io.grpc.LoadBalancer.SubchannelPicker;
|
||||
import io.grpc.ManagedChannel;
|
||||
import io.grpc.Metadata;
|
||||
import io.grpc.SynchronizationContext;
|
||||
import io.grpc.internal.PickSubchannelArgsImpl;
|
||||
import io.grpc.testing.TestMethodDescriptors;
|
||||
import java.util.Arrays;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.JUnit4;
|
||||
|
||||
/** Unit test for {@link io.grpc.xds.LazyLoadBalancer}. */
|
||||
@RunWith(JUnit4.class)
|
||||
public final class LazyLoadBalancerTest {
|
||||
private SynchronizationContext syncContext =
|
||||
new SynchronizationContext((t, e) -> {
|
||||
throw new AssertionError(e);
|
||||
});
|
||||
private LoadBalancer.PickSubchannelArgs args = new PickSubchannelArgsImpl(
|
||||
TestMethodDescriptors.voidMethod(),
|
||||
new Metadata(),
|
||||
CallOptions.DEFAULT,
|
||||
new LoadBalancer.PickDetailsConsumer() {});
|
||||
private FakeHelper helper = new FakeHelper();
|
||||
|
||||
@Test
|
||||
public void pickerIsNoopAfterEarlyShutdown() {
|
||||
LazyLoadBalancer lb = new LazyLoadBalancer(helper, new LoadBalancer.Factory() {
|
||||
@Override
|
||||
public LoadBalancer newLoadBalancer(LoadBalancer.Helper helper) {
|
||||
throw new AssertionError("unexpected");
|
||||
}
|
||||
});
|
||||
lb.acceptResolvedAddresses(ResolvedAddresses.newBuilder()
|
||||
.setAddresses(Arrays.asList())
|
||||
.build());
|
||||
SubchannelPicker picker = helper.picker;
|
||||
assertThat(picker).isNotNull();
|
||||
lb.shutdown();
|
||||
|
||||
picker.pickSubchannel(args);
|
||||
}
|
||||
|
||||
class FakeHelper extends LoadBalancer.Helper {
|
||||
ConnectivityState state;
|
||||
SubchannelPicker picker;
|
||||
|
||||
@Override
|
||||
public ManagedChannel createOobChannel(EquivalentAddressGroup eag, String authority) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateBalancingState(ConnectivityState newState, SubchannelPicker newPicker) {
|
||||
this.state = newState;
|
||||
this.picker = newPicker;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SynchronizationContext getSynchronizationContext() {
|
||||
return syncContext;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAuthority() {
|
||||
return "localhost";
|
||||
}
|
||||
}
|
||||
}
|
|
@ -261,7 +261,7 @@ public class RingHashLoadBalancerTest {
|
|||
private void verifyConnection(int times) {
|
||||
for (int i = 0; i < times; i++) {
|
||||
Subchannel connectOnce = connectionRequestedQueue.poll();
|
||||
assertWithMessage("Null connection is at (%s) of (%s)", i, times)
|
||||
assertWithMessage("Expected %s new connections, but found %s", times, i)
|
||||
.that(connectOnce).isNotNull();
|
||||
clearInvocations(connectOnce);
|
||||
}
|
||||
|
@ -648,7 +648,7 @@ public class RingHashLoadBalancerTest {
|
|||
getSubchannel(servers, 2),
|
||||
ConnectivityStateInfo.forTransientFailure(
|
||||
Status.PERMISSION_DENIED.withDescription("permission denied")));
|
||||
verify(helper).updateBalancingState(eq(CONNECTING), pickerCaptor.capture());
|
||||
verify(helper).updateBalancingState(eq(TRANSIENT_FAILURE), pickerCaptor.capture());
|
||||
verifyConnection(0);
|
||||
PickResult result = pickerCaptor.getValue().pickSubchannel(args); // activate last subchannel
|
||||
assertThat(result.getStatus().isOk()).isTrue();
|
||||
|
|
Loading…
Reference in New Issue