Performance Benchmarking of Contains, Exists and Any

I have been searching for a performance benchmarking between Contains, Exists and Any methods available in the List<T>. I wanted to find this out just out of curiosity as I was always confused among these. Many questions on SO described definitions of these methods such as:

  1. LINQ Ring: Any() vs Contains() for Huge Collections
  2. Linq .Any VS .Exists - Whats the difference?
  3. LINQ extension methods - Any() vs. Where() vs. Exists()

So I decided to do it myself. I am adding it as an answer. Any more insight on the results is most welcomed. I also did this benchmarking for arrays to see the results

79849 次浏览

According to documentation:

List.Exists (Object method)

Determines whether the List(T) contains elements that match the conditions defined by the specified predicate.

IEnumerable.Any (Extension method)

Determines whether any element of a sequence satisfies a condition.

List.Contains (Object Method)

Determines whether an element is in the List.

Benchmarking:

CODE:

    static void Main(string[] args)
{
ContainsExistsAnyShort();


ContainsExistsAny();
}
    

private static void ContainsExistsAny()
{
Console.WriteLine("***************************************");
Console.WriteLine("********* ContainsExistsAny ***********");
Console.WriteLine("***************************************");


List<int> list = new List<int>(6000000);
Random random = new Random();
for (int i = 0; i < 6000000; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();


find(list, arr);
}


private static void ContainsExistsAnyShort()
{
Console.WriteLine("***************************************");
Console.WriteLine("***** ContainsExistsAnyShortRange *****");
Console.WriteLine("***************************************");


List<int> list = new List<int>(2000);
Random random = new Random();
for (int i = 0; i < 2000; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();


find(list, arr);
}


private static void find(List<int> list, int[] arr)
{
Random random = new Random();
int[] find = new int[10000];
for (int i = 0; i < 10000; i++)
{
find[i] = random.Next(6000000);
}


Stopwatch watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine("List/Contains: {0:N0}ms", watch.ElapsedMilliseconds);


watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Exists(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine("List/Exists: {0:N0}ms", watch.ElapsedMilliseconds);


watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Any(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine("List/Any: {0:N0}ms", watch.ElapsedMilliseconds);


watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
arr.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine("Array/Contains: {0:N0}ms", watch.ElapsedMilliseconds);


Console.WriteLine("Arrays do not have Exists");


watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
arr.Any(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine("Array/Any: {0:N0}ms", watch.ElapsedMilliseconds);
}

RESULTS

***************************************
***** ContainsExistsAnyShortRange *****
***************************************
List/Contains: 96ms
List/Exists: 146ms
List/Any: 381ms
Array/Contains: 34ms
Arrays do not have Exists
Array/Any: 410ms
***************************************
********* ContainsExistsAny ***********
***************************************
List/Contains: 257,996ms
List/Exists: 379,951ms
List/Any: 884,853ms
Array/Contains: 72,486ms
Arrays do not have Exists
Array/Any: 1,013,303ms

The fastest way is to use a HashSet. The Contains for a HashSet is O(1).

I took your code and added a benchmark for HashSet<int> The performance cost of HashSet<int> set = new HashSet<int>(list); is nearly zero.

Code

void Main()
{
ContainsExistsAnyVeryShort();
    

ContainsExistsAnyShort();


ContainsExistsAny();
}


private static void ContainsExistsAny()
{
Console.WriteLine("***************************************");
Console.WriteLine("********* ContainsExistsAny ***********");
Console.WriteLine("***************************************");


List<int> list = new List<int>(6000000);
Random random = new Random();
for (int i = 0; i < 6000000; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();
HashSet<int> set = new HashSet<int>(list);


find(list, arr, set, (method, stopwatch) => $"{method}: {stopwatch.ElapsedMilliseconds}ms");


}


private static void ContainsExistsAnyShort()
{
Console.WriteLine("***************************************");
Console.WriteLine("***** ContainsExistsAnyShortRange *****");
Console.WriteLine("***************************************");


List<int> list = new List<int>(2000);
Random random = new Random();
for (int i = 0; i < 2000; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();
HashSet<int> set = new HashSet<int>(list);


find(list, arr, set, (method, stopwatch) => $"{method}: {stopwatch.ElapsedMilliseconds}ms");


}


private static void ContainsExistsAnyVeryShort()
{
Console.WriteLine("*******************************************");
Console.WriteLine("***** ContainsExistsAnyVeryShortRange *****");
Console.WriteLine("*******************************************");


List<int> list = new List<int>(10);
Random random = new Random();
for (int i = 0; i < 10; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();
HashSet<int> set = new HashSet<int>(list);


find(list, arr, set, (method, stopwatch) => $"{method}: {stopwatch.ElapsedTicks} ticks");


}


private static void find(List<int> list, int[] arr, HashSet<int> set, Func<string, Stopwatch, string> format)
{
Random random = new Random();
int[] find = new int[10000];
for (int i = 0; i < 10000; i++)
{
find[i] = random.Next(6000000);
}


Stopwatch watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine(format("List/Contains", watch));


watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Exists(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine(format("List/Exists", watch));


watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Any(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine(format("List/Any", watch));


watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
arr.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine(format("Array/Contains", watch));


watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
Array.Exists(arr, a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine(format("Array/Exists", watch));


watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
Array.IndexOf(arr, find[rpt]);
}
watch.Stop();
Console.WriteLine(format("Array/IndexOf", watch));


watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
arr.Any(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine(format("Array/Any", watch));


watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
set.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine(format("HashSet/Contains", watch));
}

RESULTS

*******************************************
***** ContainsExistsAnyVeryShortRange *****
*******************************************
List/Contains: 1067 ticks
List/Exists: 2884 ticks
List/Any: 10520 ticks
Array/Contains: 1880 ticks
Array/Exists: 5526 ticks
Array/IndexOf: 601 ticks
Array/Any: 13295 ticks
HashSet/Contains: 6629 ticks
***************************************
***** ContainsExistsAnyShortRange *****
***************************************
List/Contains: 4ms
List/Exists: 28ms
List/Any: 138ms
Array/Contains: 6ms
Array/Exists: 34ms
Array/IndexOf: 3ms
Array/Any: 96ms
HashSet/Contains: 0ms
***************************************
********* ContainsExistsAny ***********
***************************************
List/Contains: 11504ms
List/Exists: 57084ms
List/Any: 257659ms
Array/Contains: 11643ms
Array/Exists: 52477ms
Array/IndexOf: 11741ms
Array/Any: 194184ms
HashSet/Contains: 3ms

Edit (2021-08-25)

I added a comparison for very short collections (10 items) and also added Array.Contains and Array.IndexOf. You can see that Array.IndexOf is the fastest for such small ranges. That is, because as @lucky-brian said n is so small here, that a for-loop performs better than a somewhat complex search algorithm. However I still advice to use HashSet<T> whenever possible, as it better reflects the intend of only having unique values and the difference for small collections is below 1ms.

It is worth mentioning that this comparison is a bit unfair, since the Array class doesn't own the Contains() method. It uses an extension method for IEnumerable<T> via a sequential Enumerator, hence it is not optimized for Array instances. On the other side, HashSet<T> has its own implementation fully optimized for all sizes.

To compare fairly you could use the static method int Array.IndexOf() which is implemented for Array instances, even though it uses a for loop slightly more efficient that an Enumerator.

Using a fair comparison algorithm, the performance for small sets of up to 5 elements of HashSet<T>.Contains() is similar to the Array.IndexOf() but it is much more efficient for larger sets.