Skip to content

Commit

Permalink
Merge pull request #21731 from heejaechang/lowPriorityTimeout
Browse files Browse the repository at this point in the history
Added retry on OOP start up
  • Loading branch information
heejaechang authored Aug 30, 2017
2 parents b735460 + 3009caa commit b1476ba
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 59 deletions.
139 changes: 88 additions & 51 deletions src/VisualStudio/Core/Next/Remote/ServiceHubRemoteHostClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,14 @@ public static async Task<RemoteHostClient> CreateAsync(
{
using (Logger.LogBlock(FunctionId.ServiceHubRemoteHostClient_CreateAsync, cancellationToken))
{
// let each client to have unique id so that we can distinguish different clients when service is restarted
var currentInstanceId = Interlocked.Add(ref s_instanceId, 1);

var primary = new HubClient("ManagedLanguage.IDE.RemoteHostClient");
var current = $"VS ({Process.GetCurrentProcess().Id}) ({currentInstanceId})";

var hostGroup = new HostGroup(current);
var timeout = TimeSpan.FromMilliseconds(workspace.Options.GetOption(RemoteHostOptions.RequestServiceTimeoutInMS));
var remoteHostStream = await RequestServiceAsync(primary, WellKnownRemoteHostServices.RemoteHostService, hostGroup, timeout, cancellationToken).ConfigureAwait(false);

var instance = new ServiceHubRemoteHostClient(workspace, primary, hostGroup, remoteHostStream);

// make sure connection is done right
var host = await instance._rpc.InvokeAsync<string>(nameof(IRemoteHostService.Connect), current, TelemetryService.DefaultSession.SerializeSettings()).ConfigureAwait(false);

// TODO: change this to non fatal watson and make VS to use inproc implementation
Contract.ThrowIfFalse(host == current.ToString());
// Retry (with timeout) until we can connect to RemoteHost (service hub process).
// we are seeing cases where we failed to connect to service hub process when a machine is under heavy load.
// (see https://devdiv.visualstudio.com/DevDiv/_workitems/edit/481103 as one of example)
var instance = await RetryRemoteCallAsync<IOException, ServiceHubRemoteHostClient>(
() => CreateWorkerAsync(workspace, primary, timeout, cancellationToken), timeout, cancellationToken).ConfigureAwait(false);

instance.Connected();

Expand All @@ -65,6 +56,43 @@ public static async Task<RemoteHostClient> CreateAsync(
}
}

public static async Task<ServiceHubRemoteHostClient> CreateWorkerAsync(Workspace workspace, HubClient primary, TimeSpan timeout, CancellationToken cancellationToken)
{
ServiceHubRemoteHostClient client = null;
try
{
// let each client to have unique id so that we can distinguish different clients when service is restarted
var currentInstanceId = Interlocked.Add(ref s_instanceId, 1);

var current = $"VS ({Process.GetCurrentProcess().Id}) ({currentInstanceId})";

var hostGroup = new HostGroup(current);
var remoteHostStream = await RequestServiceAsync(
primary, WellKnownRemoteHostServices.RemoteHostService, hostGroup, timeout, cancellationToken).ConfigureAwait(false);

client = new ServiceHubRemoteHostClient(workspace, primary, hostGroup, remoteHostStream);

await client._rpc.InvokeWithCancellationAsync<string>(
nameof(IRemoteHostService.Connect),
new object[] { current, TelemetryService.DefaultSession.SerializeSettings() },
cancellationToken).ConfigureAwait(false);

return client;
}
catch (Exception ex)
{
// make sure we shutdown client if initializing client has failed.
client?.Shutdown();

// translate to our own cancellation if it is raised.
cancellationToken.ThrowIfCancellationRequested();

// otherwise, report watson and throw original exception
WatsonReporter.Report("ServiceHub creation failed", ex, ReportDetailInfo);
throw;
}
}

private static async Task RegisterWorkspaceHostAsync(Workspace workspace, RemoteHostClient client)
{
var vsWorkspace = workspace as VisualStudioWorkspaceImpl;
Expand All @@ -88,7 +116,7 @@ await Task.Factory.SafeStartNew(() =>

private ServiceHubRemoteHostClient(
Workspace workspace, HubClient hubClient, HostGroup hostGroup, Stream stream) :
base(workspace)
base(workspace)
{
_hubClient = hubClient;
_hostGroup = hostGroup;
Expand Down Expand Up @@ -136,6 +164,40 @@ private void OnRpcDisconnected(object sender, JsonRpcDisconnectedEventArgs e)
Disconnected();
}

/// <summary>
/// call <paramref name="funcAsync"/> and retry up to <paramref name="timeout"/> if the call throws
/// <typeparamref name="TException"/>. any other exception from the call won't be handled here.
/// </summary>
private static async Task<TResult> RetryRemoteCallAsync<TException, TResult>(
Func<Task<TResult>> funcAsync,
TimeSpan timeout,
CancellationToken cancellationToken) where TException : Exception
{
const int retry_delayInMS = 50;

var start = DateTime.UtcNow;
while (DateTime.UtcNow - start < timeout)
{
cancellationToken.ThrowIfCancellationRequested();

try
{
return await funcAsync().ConfigureAwait(false);
}
catch (TException)
{
// throw cancellation token if operation is cancelled
cancellationToken.ThrowIfCancellationRequested();
}

// wait for retry_delayInMS before next try
await Task.Delay(retry_delayInMS, cancellationToken).ConfigureAwait(false);
}

// operation timed out, more than we are willing to wait
throw new TimeoutException("RequestServiceAsync timed out");
}

private static async Task<Stream> RequestServiceAsync(
HubClient client,
string serviceName,
Expand All @@ -156,7 +218,17 @@ private static async Task<Stream> RequestServiceAsync(
{
try
{
return await RequestServiceAsync(client, descriptor, timeout, cancellationToken).ConfigureAwait(false);
// we are wrapping HubClient.RequestServiceAsync since we can't control its internal timeout value ourselves.
// we have bug opened to track the issue.
// https://devdiv.visualstudio.com/DefaultCollection/DevDiv/Editor/_workitems?id=378757&fullScreen=false&_a=edit

// retry on cancellation token since HubClient will throw its own cancellation token
// when it couldn't connect to service hub service for some reasons
// (ex, OOP process GC blocked and not responding to request)
return await RetryRemoteCallAsync<OperationCanceledException, Stream>(
() => client.RequestServiceAsync(descriptor, cancellationToken),
timeout,
cancellationToken).ConfigureAwait(false);
}
catch (RemoteInvocationException ex)
{
Expand Down Expand Up @@ -184,41 +256,6 @@ private static async Task<Stream> RequestServiceAsync(
throw ExceptionUtilities.Unreachable;
}

private static async Task<Stream> RequestServiceAsync(HubClient client, ServiceDescriptor descriptor, TimeSpan timeout, CancellationToken cancellationToken = default(CancellationToken))
{
// we are wrapping HubClient.RequestServiceAsync since we can't control its internal timeout value ourselves.
// we have bug opened to track the issue.
// https://devdiv.visualstudio.com/DefaultCollection/DevDiv/Editor/_workitems?id=378757&fullScreen=false&_a=edit
const int retry_delayInMS = 50;

var start = DateTime.UtcNow;
while (start - DateTime.UtcNow < timeout)
{
cancellationToken.ThrowIfCancellationRequested();

try
{
return await client.RequestServiceAsync(descriptor, cancellationToken).ConfigureAwait(false);
}
catch (OperationCanceledException)
{
// if it is our own cancellation token, then rethrow
// otherwise, let us retry.
//
// we do this since HubClient itself can throw its own cancellation token
// when it couldn't connect to service hub service for some reasons
// (ex, OOP process GC blocked and not responding to request)
cancellationToken.ThrowIfCancellationRequested();
}

// wait for retry_delayInMS before next try
await Task.Delay(retry_delayInMS, cancellationToken).ConfigureAwait(false);
}

// request service to HubClient timed out, more than we are willing to wait
throw new TimeoutException("RequestServiceAsync timed out");
}

private static int ReportDetailInfo(IFaultUtility faultUtility)
{
// 0 means send watson, otherwise, cancel watson
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public void TestRemoteHostConnect()
var remoteHostService = CreateService();

var input = "Test";
var output = remoteHostService.Connect(input, serializedSession: null);
var output = remoteHostService.Connect(input, serializedSession: null, cancellationToken: CancellationToken.None);

Assert.Equal(input, output);
}
Expand Down
3 changes: 2 additions & 1 deletion src/Workspaces/Core/Portable/Remote/IRemoteHostService.cs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System.Threading;
using System.Threading.Tasks;

namespace Microsoft.CodeAnalysis.Remote
{
internal interface IRemoteHostService
{
string Connect(string host, string serializedSession);
string Connect(string host, string serializedSession, CancellationToken cancellationToken);
Task SynchronizePrimaryWorkspaceAsync(Checksum checksum);
Task SynchronizeGlobalAssetsAsync(Checksum[] checksums);

Expand Down
19 changes: 13 additions & 6 deletions src/Workspaces/Remote/ServiceHub/Services/RemoteHostService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,6 @@ static RemoteHostService()
// we set up logger here
RoslynLogger.SetLogger(new EtwLogger(GetLoggingChecker()));

// Set this process's priority BelowNormal.
// this should let us to freely try to use all resources possible without worrying about affecting
// host's work such as responsiveness or build.
Process.GetCurrentProcess().PriorityClass = ProcessPriorityClass.BelowNormal;

SetNativeDllSearchDirectories();
}

Expand All @@ -56,8 +51,11 @@ public RemoteHostService(Stream stream, IServiceProvider serviceProvider) :
Rpc.StartListening();
}

public string Connect(string host, string serializedSession)
public string Connect(string host, string serializedSession, CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();

// this is called only once when Host (VS) started RemoteHost (OOP)
_primaryInstance = InstanceId;

var existing = Interlocked.CompareExchange(ref _host, host, null);
Expand All @@ -72,6 +70,15 @@ public string Connect(string host, string serializedSession)
// log telemetry that service hub started
RoslynLogger.Log(FunctionId.RemoteHost_Connect, KeyValueLogMessage.Create(SetSessionInfo));

// serializedSession will be null for testing
if (serializedSession != null)
{
// Set this process's priority BelowNormal.
// this should let us to freely try to use all resources possible without worrying about affecting
// host's work such as responsiveness or build.
Process.GetCurrentProcess().PriorityClass = ProcessPriorityClass.BelowNormal;
}

return _host;
}

Expand Down

0 comments on commit b1476ba

Please sign in to comment.