Project Euler #13: Large Sum

Posted on

Problem

Description:

Work out the first ten digits of the sum of the following one-hundred 50-digit numbers.

Of course I didn’t include the numbers in the description…

I am sure there is a better way to do this. I just did a simple solution, where I add all the numbers and take the first 10 digits.

import java.math.BigInteger;

public class ProjectEuler13 {

    public static void main(String[] args) {
        String[] numbers = { "37107287533902102798797998220837590246510135740250",
                "46376937677490009712648124896970078050417018260538",
                "74324986199524741059474233309513058123726617309629",
                "91942213363574161572522430563301811072406154908250",
                "23067588207539346171171980310421047513778063246676",
                "89261670696623633820136378418383684178734361726757",
                "28112879812849979408065481931592621691275889832738",
                "44274228917432520321923589422876796487670272189318",
                "47451445736001306439091167216856844588711603153276",
                "70386486105843025439939619828917593665686757934951",
                "62176457141856560629502157223196586755079324193331",
                "64906352462741904929101432445813822663347944758178",
                "92575867718337217661963751590579239728245598838407",
                "58203565325359399008402633568948830189458628227828",
                "80181199384826282014278194139940567587151170094390",
                "35398664372827112653829987240784473053190104293586",
                "86515506006295864861532075273371959191420517255829",
                "71693888707715466499115593487603532921714970056938",
                "54370070576826684624621495650076471787294438377604",
                "53282654108756828443191190634694037855217779295145",
                "36123272525000296071075082563815656710885258350721",
                "45876576172410976447339110607218265236877223636045",
                "17423706905851860660448207621209813287860733969412",
                "81142660418086830619328460811191061556940512689692",
                "51934325451728388641918047049293215058642563049483",
                "62467221648435076201727918039944693004732956340691",
                "15732444386908125794514089057706229429197107928209",
                "55037687525678773091862540744969844508330393682126",
                "18336384825330154686196124348767681297534375946515",
                "80386287592878490201521685554828717201219257766954",
                "78182833757993103614740356856449095527097864797581",
                "16726320100436897842553539920931837441497806860984",
                "48403098129077791799088218795327364475675590848030",
                "87086987551392711854517078544161852424320693150332",
                "59959406895756536782107074926966537676326235447210",
                "69793950679652694742597709739166693763042633987085",
                "41052684708299085211399427365734116182760315001271",
                "65378607361501080857009149939512557028198746004375",
                "35829035317434717326932123578154982629742552737307",
                "94953759765105305946966067683156574377167401875275",
                "88902802571733229619176668713819931811048770190271",
                "25267680276078003013678680992525463401061632866526",
                "36270218540497705585629946580636237993140746255962",
                "24074486908231174977792365466257246923322810917141",
                "91430288197103288597806669760892938638285025333403",
                "34413065578016127815921815005561868836468420090470",
                "23053081172816430487623791969842487255036638784583",
                "11487696932154902810424020138335124462181441773470",
                "63783299490636259666498587618221225225512486764533",
                "67720186971698544312419572409913959008952310058822",
                "95548255300263520781532296796249481641953868218774",
                "76085327132285723110424803456124867697064507995236",
                "37774242535411291684276865538926205024910326572967",
                "23701913275725675285653248258265463092207058596522",
                "29798860272258331913126375147341994889534765745501",
                "18495701454879288984856827726077713721403798879715",
                "38298203783031473527721580348144513491373226651381",
                "34829543829199918180278916522431027392251122869539",
                "40957953066405232632538044100059654939159879593635",
                "29746152185502371307642255121183693803580388584903",
                "41698116222072977186158236678424689157993532961922",
                "62467957194401269043877107275048102390895523597457",
                "23189706772547915061505504953922979530901129967519",
                "86188088225875314529584099251203829009407770775672",
                "11306739708304724483816533873502340845647058077308",
                "82959174767140363198008187129011875491310547126581",
                "97623331044818386269515456334926366572897563400500",
                "42846280183517070527831839425882145521227251250327",
                "55121603546981200581762165212827652751691296897789",
                "32238195734329339946437501907836945765883352399886",
                "75506164965184775180738168837861091527357929701337",
                "62177842752192623401942399639168044983993173312731",
                "32924185707147349566916674687634660915035914677504",
                "99518671430235219628894890102423325116913619626622",
                "73267460800591547471830798392868535206946944540724",
                "76841822524674417161514036427982273348055556214818",
                "97142617910342598647204516893989422179826088076852",
                "87783646182799346313767754307809363333018982642090",
                "10848802521674670883215120185883543223812876952786",
                "71329612474782464538636993009049310363619763878039",
                "62184073572399794223406235393808339651327408011116",
                "66627891981488087797941876876144230030984490851411",
                "60661826293682836764744779239180335110989069790714",
                "85786944089552990653640447425576083659976645795096",
                "66024396409905389607120198219976047599490197230297",
                "64913982680032973156037120041377903785566085089252",
                "16730939319872750275468906903707539413042652315011",
                "94809377245048795150954100921645863754710598436791",
                "78639167021187492431995700641917969777599028300699",
                "15368713711936614952811305876380278410754449733078",
                "40789923115535562561142322423255033685442488917353",
                "44889911501440648020369068063960672322193204149535",
                "41503128880339536053299340368006977710650566631954",
                "81234880673210146739058568557934581403627822703280",
                "82616570773948327592232845941706525094512325230608",
                "22918802058777319719839450180888072429661980811197",
                "77158542502016545090413245809786882778948721859617",
                "72107838435069186155435662884062257473692284509516",
                "20849603980134001723930671666823555245252804609722",
                "53503534226472524250874054075591789781264330331690" };
        long time = System.nanoTime();
        String result = sum(numbers).toString().substring(0, 10);
        time = System.nanoTime() - time;
        System.out.println("Result: " + result + "nTime in nanoseconds: " + time);
    }

    public static BigInteger sum(String[] values) {
        if (values.length == 0) {
            return BigInteger.ZERO;
        }
        BigInteger result = new BigInteger(values[0]);
        for (int i = 1; i < values.length; i++) {
            result = result.add(new BigInteger(values[i]));
        }
        return result;
    }

}

Note that the values above are Strings, because it was easier to type that than:

BigInteger[] values = {new BigInteger("...")}; // and so on

Output:

Result: 5537376230
Time in nanoseconds: 57357242

Concerns:

  1. It’s really just performance. There’s gotta be a better way to do this, but I can’t think of a single way.

Solution

Update

See the comments for corner cases.

Original

When I solved this problem, I readily noticed what the problem expected the programmers to understand. Let me put it this way…

An analogy

Have you ever seen a car’s odometer? If you have, you will notice that the digits in the far right increase fastest while you drive. The next digit, i.e. towards the left updates less frequently (ten times slower). The next is even slower and the left-most digit is the slowest; it doesn’t update even if you drive for hours at a stretch.

Relation

What is its relation to the problem at hand? We must understand the significance of the least significant digits is to provide the “carry” to be added to the next significant digit. Let’s take our analogy and proceed with it. The rightmost digits act as the miles, tens-of-miles, hundreds-of-miles counters, that update frequently. When they cross 9

9

and return to 0

0

, they increment the immediate left digit. It is essentially updating using the carry. These carries too become lesser and lesser in frequency as we move leftwards.

For example, take 10,950

10,950

and 15,456

15,456

. Add them up and you get 26,406

26,406

. We want the first two digits of their sum. If we take 10,000

10,000

and 15,000

15,000

, we get a slightly different answer: 25,000

25,000

.

If we analyse the first 2 (i.e. leftmost) digits of the answers, we see that they 25

25

and 26

26

differ in their one’s place. Their first digits match up. That’s why I said “slightly”.

In order to improve the precision, take an extra (buffer) digit. Using three digits: 109+154=263

109+154=263

. There you go! We now have the answers matching in their first two digits. This theory can be generalized to obtain the first n

n

digits of the sum of a collection of numbers (of length more than n

n

digits). The number of buffer digits depends on the “number of numbers” A rough relation would be

No. of buffer digits=log(no. of items)

No. of buffer digits=log(no. of items)

Trial and error supports this. I urge you to prove it yourself.

The code

Finally, let’s get cracking

public class ProjectEuler13 {

    public static void main(String[] args) {
        String[] numbers = {
            // The big long list
            };
        long time = System.nanoTime();
        String result = sum(numbers, 10).substring(0, 10);
        time = System.nanoTime() - time;
        System.out.println("Result: " + result + "nTime in nanoseconds: " + time);
    }

    public static String sum(String[] values, int numberOfDigits) {
        // best to take an overestimate
        int buffer = (int) Math.ceil(Math.log10(values.length)); // = 2 for 50
        int length = numberOfDigits + buffer;
        long result = 0;
        for (String value : values) {
            result += Long.parseLong(value.substring(0, length));
        }
        return String.valueOf(result);
    }

}

Even on my slow computer:

Result: 5537376230
Time in nanoseconds: 2493350

Yes, there is a (potentially) better way:

Take advantage of the fact that you start with input in the decimal system.

  1. Allocate an array which can assuredly store all digits of the result.
  2. Add the n-th digits (starting at the front) and add the subtotal into the result.
  3. Repeat step 2 until even the maximal subtotal cannot influence the first ten digits of the result any longer.
  4. Profit.

As an aside, I don’t think the task was intended to be solved with a bignum library at all, but that works too.

Here is a refinement of ambigram_maker’s solution for dealing with potential carries bubbling up from the far end, which essentially boils down to merging it with Deduplicator’s idea.

The crux of the matter is detecting and/or handling situations that are equivalent to the odometer showing all nines, so that the next tick must cause all digits to change.

But let’s take things step by step. If we are summing n values by processing them column by column then the carry from one column to the next can be n - 1 in the worst case. Hence the accumulator variables used must be able to store values that are up to n - 1 bigger than the maximum value of a column cell, in order to avoid overflow.

This means that the number of k-ary digits required for a column accumulator is the width of one column plus ceil(log(n - 1 + 1) / log(k)) extra digits. In the decimal realm that’s:

R = ceil(log10(n)) = (int)log10(n - 1) + 1  // n >= 2

A long can hold up to floor(63 * log10(2)) == 18 decimal digits, which means that we can make a source column up to 18 - R digits wide without risking overflow.

Also, if adding the worst-case carry of n - 1 to the lowest R digits of a column sum does not cause a carry then nothing that happens in the as-yet unprocessed columns could possibly affect any higher digits of the result. In other words, the effect of the unknown carry out of the unprocessed part must be confined to the R lowest digits of the current result.

In code the test for a sufficient level of overflow isolation looks like this:

long m = (long)pow(10, R);

if (sum % m < m - (n - 1))
    // a carry <= (n - 1) from below cannot cause carry out of the low R digits

There is an additional consideration, though: if R is computed as shown earlier then the overflow reserve can become very small, even zero. The latter will happen if the number of input values is a power of 10, so that a carry of up to n – 1 = 10^R – 1 can only be accomodated if sum % 10^R == 0, i.e. once every 10^R random inputs. Hence it is advisable to increase R by 1 or 2, in order to reduce the frequency of extended computations by a factor of 10 or 100 (roughly speaking).

This logic also allows it to give an alternative explanation for ambigram_maker’s solution. If D result digits are desired and we compute a column sum of the first D + R digits then the effect of the remaining, unprocessed digits must be confined to the low R digits of the result if above condition holds; in that case all but the lowest R digits of the result must be correct. If at least one of the input numbers has a non-zero first digit then the sum cannot be shorter than the inputs, and taking the first D gives the desired result. For this computation the accumulator needs to be big enough to hold R + D + R digits (one R for accrued carries, and one R for absorbing carries from below).

The apparent discrepancy between the actual performance of ambigram_maker’s solution with R == 2 (only one problem every 200 random inputs) vs. the pessimistic outlook of the test criterion (for R == 2 and N == 100, only every 100th input does not require extended computation) comes from the fact that the test must necessarily assume the worst about the unknown, unprocessed portions of the input. Hence the suggestion to increase R appropriately.

Finally, here’s the code. I don’t have a Java environment handy, so I had to write it in a hashish language (my LINQPad is always at most one key tap away):

const int FULL_DIGITS_PER_LONG = 18;

static string sum_prefix (string[] values, int result_digits = 10)
{
    Trace.Assert(values.Length > 0);

    int n = values.Length;
    int r = (int)Math.Ceiling(Math.Log10(n)) + 2;   // # of overflow reserve digits
    int u = FULL_DIGITS_PER_LONG - r;           // # of remaining/usable digits per long
    int m = (int)Math.Pow(10, r);               // modulus for the overflow reserve digits

    Trace.Assert(u >= result_digits + r);   // result not guaranteed to fit in a single limb otherwise
    Trace.Assert(m >= n);                   // ward off oopses

    int l = values[0].Length;

    Trace.Assert(l >= result_digits + r);
    Trace.Assert(values.Where((si) => si.Length != l).Count() == 0);
    Trace.Assert(values.Where((sj) => sj[0] != '0').Count() > 0);

    var sum_limbs = new List<long>();
    int digits = result_digits + r;         // in the first round, use exactly what's needed
    long sum;                               // final values of sum and digits needed after the loop

    for (int offset = 0; ; offset += digits, digits = u)
    {
        digits = Math.Min(digits, l - offset);  // # of source digits in this round

        sum = 0;
        foreach (string s in values)
            sum += long.Parse(s.Substring(offset, digits));

        if (sum % m < m - (n - 1) || offset >= l - digits) // worst-case carry can be absorbed
            break;

        sum_limbs.Add(sum); 
    }

    for (int i = sum_limbs.Count; --i >= 0; digits = u)
    {
        long carry = sum / (long)Math.Pow(10, digits);
        sum = sum_limbs[i] + carry;
    }

    return sum.ToString().Substring(0, result_digits);
}

UPDATE: with optimisations like this, people will naturally ask what the bottom lines is. Hence I pitted the above function against the following code based on the builtin BigInteger type, after surreptitiously tweaking the carry reserve from R+1 to R+2:

static string via_BigInteger (string[] values, int digits = 10)
{
    var sum = new System.Numerics.BigInteger(0);

    foreach (var s in values)
        sum += System.Numerics.BigInteger.Parse(s);

    return sum.ToString().Substring(0, digits);
}

Result: 0.943 ms for the BigInteger code, 0.018 ms for the smart prefix calculation (per call, averaged, random inputs). That’s a speedup factor of 50 – well worth the effort.

The number of extended computation rounds corresponded well to the value expected for the Euler task parameters and R+2, which is around 1%. For R+1 the value was an expected 10%, and the timing was 0.019 ms.

Leave a Reply

Your email address will not be published. Required fields are marked *