Friday 24 June 2016

C# In Memory Search Performance Comparison

Just in case one ever wants to do really fast in memory searches, it's useful to know which classes are the fastest, so I did some tests.  Here are the results from searching for a million integers in an array of a million integers.

Warming up...

Single threaded search...

Testing HashSet...
Create (and sort) took 209ms
Search took 208ms
Total time 417ms

Testing Dictionary...
Create (and sort) took 375ms
Search took 117ms
Total time 492ms

Testing BinarySearch...
Create (and sort) took 187ms
Search took 554ms
Total time 741ms

Testing SortedList...
Create (and sort) took 419ms
Search took 543ms
Total time 962ms

Testing HashTable...
Create (and sort) took 1180ms
Search took 530ms
Total time 1710ms

Multithreaded search...

Testing HashSet...
Create (and sort) took 114ms
Search took 65ms
Total time 179ms

Testing Dictionary...
Create (and sort) took 218ms
Search took 55ms
Total time 273ms

Testing BinarySearch...
Create (and sort) took 186ms
Search took 168ms
Total time 354ms

Testing SortedList...
Create (and sort) took 431ms
Search took 171ms
Total time 602ms

Testing HashTable...
Create (and sort) took 972ms
Search took 343ms
Total time 1315ms


And here's the source code I used to get these stats:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Diagnostics;
using System.Threading.Tasks;
using System.Collections;

namespace PerformanceTesting
{
    class Program
    {
        static void Main(string[] args)
        {
            Random rnd = new Random();
            var warmUp = Enumerable.Range(0, 1000000)
                .Select(i => rnd.Next())
                .Distinct()
                .Take(100000)
                .ToArray();
            var randomIntegersToStore = Enumerable
                .Range(0, 10000000)
                .Select(i => rnd.Next())
                .Distinct()
                .Take(1000000)
                .ToArray();
            if (randomIntegersToStore.Count() != 1000000)
                throw new NotSupportedException("Incorrect number of integers");
            var randomIntegersToSearchFor = Enumerable.Range(0, 1000000)
                .Select(i => rnd.Next())
                .ToArray();
            var tests = new List<ITest>
            {
                new HashSetTest(),
                new DictionaryTest(),
                new BinarySearchTest(),
                new SortedListTest(),
                new HashTableTest()
            };
            Console.WriteLine("Warming up...");
            Console.WriteLine();
            foreach(var test in tests)
            {
                test.Initialize(warmUp);
                test.Search(warmUp, false);
            }
            Console.WriteLine("Single threaded search...");
            Console.WriteLine();
            RunTests(tests, warmUp, randomIntegersToStore, randomIntegersToSearchFor, false);
            Console.WriteLine("Multithreaded search...");
            Console.WriteLine();
            RunTests(tests, warmUp, randomIntegersToStore, randomIntegersToSearchFor, true); Console.ReadLine();
        }

        private static void RunTests(IEnumerable<ITest> tests,
            int[] warmUp, int[] randomIntegersToStore, int[] randomIntegersToSearchFor,
            bool multithreadedSearch)
        {
            foreach (var test in tests)
            {
                test.Initialize(warmUp);
                Console.WriteLine("Testing " + test.GetType().Name.Replace("Test", "") + "...");
                var stopwatch = Stopwatch.StartNew();
                test.Initialize(randomIntegersToStore);
                stopwatch.Stop();
                var createTime = stopwatch.ElapsedMilliseconds;
                Console.WriteLine("Create (and sort) took " + createTime + "ms");
                test.Search(warmUp, multithreadedSearch);
                stopwatch.Restart();
                test.Search(randomIntegersToSearchFor, multithreadedSearch);
                stopwatch.Stop();
                var searchTime = stopwatch.ElapsedMilliseconds;
                Console.WriteLine("Search took " + searchTime + "ms");
                Console.WriteLine("Total time " + (createTime + searchTime).ToString() + "ms");
                Console.WriteLine();
            }
        }

        interface ITest
        {
            void Initialize(int[] randomIntegers);
            void Search(int[] randomIntegers, bool multiThreaded);
        }

        class HashSetTest : ITest
        {
            private HashSet<int> hashSet;
            public void Initialize(int[] randomIntegers)
            {
                hashSet = new HashSet<int>(randomIntegers);
            }

            public void Search(int[] randomIntegers, bool multiThreaded)
            {
                if (multiThreaded)
                    Parallel.ForEach(randomIntegers, i => hashSet.Contains(i));
                else
                    foreach (var i in randomIntegers)
                    {
                        hashSet.Contains(i);
                    }
            }
        }

        class BinarySearchTest : ITest
        {
            private List<int> list;
            public void Initialize(int[] randomIntegers)
            {
                list = new List<int>(randomIntegers);
                list.Sort();
            }

            public void Search(int[] randomIntegers, bool multiThreaded)
            {
                if (multiThreaded)
                    Parallel.ForEach(randomIntegers, i => list.BinarySearch(i));
                else
                    foreach (var i in randomIntegers)
                    {
                        list.BinarySearch(i);
                    }
            }
        }

        class SortedListTest : ITest
        {
            private SortedList<int, int> list;
            public void Initialize(int[] randomIntegers)
            {
                list = new SortedList<int, int>(randomIntegers.ToDictionary(i => i));
            }

            public void Search(int[] randomIntegers, bool multiThreaded)
            {
                if (multiThreaded)
                    Parallel.ForEach(randomIntegers, i => list.ContainsKey(i));
                else
                    foreach (var i in randomIntegers)
                    {
                        list.ContainsKey(i);
                    }
            }
        }

        class DictionaryTest : ITest
        {
            private Dictionary<int, int> dictionary;
            public void Initialize(int[] randomIntegers)
            {
                dictionary = randomIntegers.ToDictionary(i => i);
            }

            public void Search(int[] randomIntegers, bool multiThreaded)
            {
                if (multiThreaded)
                    Parallel.ForEach(randomIntegers, i => dictionary.ContainsKey(i));
                else
                    foreach (var i in randomIntegers)
                    {
                        dictionary.ContainsKey(i);
                    }
            }
        }

        class HashTableTest : ITest
        {
            private Hashtable hashTable;
            public void Initialize(int[] randomIntegers)
            {
                hashTable = new Hashtable(randomIntegers.ToDictionary(i => i));
            }

            public void Search(int[] randomIntegers, bool multiThreaded)
            {
                if (multiThreaded)
                    Parallel.ForEach(randomIntegers, i => hashTable.ContainsKey(i));
                else
                    foreach (var i in randomIntegers)
                    {
                        hashTable.ContainsKey(i);
                    }
            }
        }
    }
}

How to reduce complexity and know everything.

Quality We have a great QA. His code is like a Rolex. You look at his gherkin, and smile.  It says what it's testing, in plain English, ...