Executing GIT Commands from .NET using a Process

I recently needed some metadata off of git repositories, like a list of indexed files. I initially tried to use LibGit2Sharp, but it often failed to return file information for things like merge commits.

So I just executed the git executable from .NET using a Process, and called it a GitExecutor for lack of a better name. Maybe it is useful for someone else attempting to do the same, so I am sharing it here.

Table of contents

Executing Git Commands: The GitExecutor

We start by defining a GitException, which will be thrown, when an error occurs:

// Licensed under the MIT license. See LICENSE file in the project root for full license information.

namespace ElasticsearchCodeSearch.Indexer.Git.Exceptions
{
    public class GitException : Exception
    {
        public readonly int ExitCode;
        public readonly string Errors;

        public GitException(int exitCode, string errors) 
        { 
            ExitCode = exitCode;
            Errors = errors;
        }
    }
}

And then we can use the System.Diagnostics.Process class to execute the git executable and return the results:

// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using ElasticsearchCodeSearch.Indexer.Git.Exceptions;
using ElasticsearchCodeSearch.Shared.Logging;
using Microsoft.Extensions.Logging;
using System.Diagnostics;
using System.Text;

namespace ElasticsearchCodeSearch.Indexer.Git
{
    public class GitExecutor
    {
        private readonly ILogger<GitExecutor> _logger;

        public GitExecutor(ILogger<GitExecutor> logger) 
        {
            _logger = logger;
        }

        public async Task Clone(string repository_url, string repository_directory, CancellationToken cancellationToken)
        {
            await RunAsync($"clone {repository_url} {repository_directory}", string.Empty, cancellationToken);
        }

        public async Task<string> SHA1(string repository_directory, string path, CancellationToken cancellationToken)
        {
            var result = await RunAsync($"ls-files -s {path}", repository_directory, cancellationToken);

            return result.Split(" ").Skip(1).First();
        }

        public async Task<string> CommitHash(string repository_directory, string path, CancellationToken cancellationToken)
        {
            var result = await RunAsync($"log --pretty=format:\"%H\" -n 1 -- {path}", repository_directory, cancellationToken);

            return result;
        }

        public async Task<DateTime> LatestCommitDate(string repository_directory, string path, CancellationToken cancellationToken)
        {
            var result = await RunAsync($" log -1  --date=iso-strict --format=\"%ad\" -- {path}", repository_directory, cancellationToken);

            if(DateTime.TryParse(result, out var date))
            {
                return date;
            }

            return default;
        }

        public async Task<string[]> ListFiles(string repository_directory, CancellationToken cancellationToken)
        {
            var result = await RunAsync($"ls-files", repository_directory, cancellationToken);

            var files = result
                .Split("\r\n")
                .ToArray();

            return files;
        }

        public async Task<string> RunAsync(string arguments, string workingDirectory, CancellationToken cancellationToken)
        {
            var result = await RunProcessAsync("git", arguments, workingDirectory, cancellationToken);

            if(result.ExitCode != 0)
            {
                throw new GitException(result.ExitCode, result.Errors);
            }

            return result.Output;
        }

        private async Task<(int ExitCode, string Output, string Errors)> RunProcessAsync(string application, string arguments, string workingDirectory, CancellationToken cancellationToken)
        {
            using (var process = new Process())
            {
                process.StartInfo = new ProcessStartInfo
                {
                    CreateNoWindow = true,
                    UseShellExecute = false,
                    RedirectStandardError = true,
                    RedirectStandardOutput = true,
                    FileName = application,
                    Arguments = arguments,
                    WorkingDirectory = workingDirectory,
                };

                var outputBuilder = new StringBuilder();
                var errorsBuilder = new StringBuilder();

                process.OutputDataReceived += (_, args) => outputBuilder.AppendLine(args.Data);
                process.ErrorDataReceived += (_, args) => errorsBuilder.AppendLine(args.Data);

                process.Start();

                process.BeginOutputReadLine();
                process.BeginErrorReadLine();

                await process.WaitForExitAsync(cancellationToken);

                var exitCode = process.ExitCode;
                var output = outputBuilder.ToString().Trim();
                var errors = errorsBuilder.ToString().Trim();

                return (exitCode, output, errors);
            }
        }
    }
}

In the code I start by adding it as a Singleton in the Starup:

builder.Services.AddSingleton<GitExecutor>();

And then use it like this:

// Licensed under the MIT license. See LICENSE file in the project root for full license information.

// ...

namespace ElasticsearchCodeSearch.Indexer.Services
{
    /// <summary>
    /// Git Indexer.
    /// </summary>
    public class GitIndexerService
    {
        private readonly GitExecutor _git;

        public GitIndexerService(GitExecutor git)
        {
            _git = git;
        }

        public async ValueTask IndexRepositoryAsync(RepositoryMetadataDto repositoryMetadata, CancellationToken cancellationToken)
        {
            _logger.TraceMethodEntry();

            try
            {
                // ...

                await _git
                    .Clone(repositoryMetadata.CloneUrl, workingDirectory, cancellationToken)
                    .ConfigureAwait(false);

                // Get the list of allowed files, by matching against allowed extensions (.c, .cpp, ...)
                // and allowed filenames (.gitignore, README, ...). We don't want to parse binary data.
                var batches =  (await _git.ListFiles(workingDirectory, cancellationToken).ConfigureAwait(false))
                    .Where(filename => IsAllowedFile(filename, allowedExtensions, allowedFilenames))
                    .Chunk(_options.BatchSize);

                var parallelOptions = new ParallelOptions()
                {
                    MaxDegreeOfParallelism = _options.MaxParallelBulkRequests,
                    CancellationToken = cancellationToken
                };

                await Parallel
                    .ForEachAsync(source: batches, parallelOptions: parallelOptions, body: (source, cancellationToken) => IndexDocumentsAsync(repositoryMetadata, source, cancellationToken))
                    .ConfigureAwait(false);
            } 
            catch(Exception e)
            {
                _logger.LogError(e, "Indexing Repository '{Repository}' failed", repositoryMetadata.FullName);

                throw;
            } 
            finally
            {
                // ...
            }
        }

        // ...

    ]
}

How to contribute

One of the easiest ways to contribute is to participate in discussions. You can also contribute by submitting pull requests.

General feedback and discussions?

Do you have questions or feedback on this article? Please create an issue on the Repositories issue tracker.

Something is wrong or missing?

There may be something wrong or missing in this article. If you want to help fixing it, then please make a Pull Request to this file.