Counting hits by domain

Posted on

Problem

I wrote this algorithm which takes input data from something like a CSV file or a large array that contain a series of String elements in the format of “count, FQDN” and then adds or increments the count of each domain component up to the complete FQDN. For example:

// Sample output (in any order/format):
// getTotalsByDomain(counts)
//   1320    com
//   900    google.com
//   410    yahoo.com
//    60    mail.yahoo.com
//    10    mobile.sports.yahoo.com
//    50    sports.yahoo.com
//    10    stackoverflow.com
//     3  org
//     3  wikipedia.org
//     2  en.wikipedia.org
//     1  es.wikipedia.org
//     1  mobile.sports
//   1  sports

let counts = [ "900,google.com",
    "60,mail.yahoo.com", 
    "10,mobile.sports.yahoo.com", 
    "40,sports.yahoo.com", 
    "300,yahoo.com", 
    "10,stackoverflow.com", 
    "2,en.wikipedia.org", 
    "1,es.wikipedia.org", 
    "1,mobile.sports" ];

I was able to do this pretty well with the below algorithm, but I am concerned with the inner for loop which uses var ‘j’. I felt like the only way that I could incrementally parse the domain components from the already split array was to create another array which unshifted the domain components to partially create a new array until I completed all of the components of the given FQDN element.

function getDomainHits(arr){

  var splitCount = [];
  var domainCountDict = {};

  for (var i = 0; i < arr.length; i++){

    splitCount = arr[i].split(",");

    var curCnt = 0;
          if (splitCount[0]){

            curCnt = splitCount[0];

          }

    var domain = [];
    var currentDom = [];
          if (splitCount[1] != undefined && splitCount[1]){

            domain = splitCount[1].split(".");


            for (var j = domain.length - 1; j >= 0; j--){


               currentDom.unshift(domain.pop());

               /*console.log("current iter: " + k + "n"
                           + "currentDom: " + currentDom.join(".") + "n"
                           + "current count: " + curCnt + "n");*/

              if (currentDom.join(".") in domainCountDict){
                /*console.log("currentDom2: " + currentDom.join("."));
                console.log("increment existing");*/
                domainCountDict[currentDom.join(".")] += parseInt(curCnt);  


              }
              if (!(currentDom.join(".") in domainCountDict)){
                /*console.log("currentDom3: " + currentDom.join("."));
                console.log("increment new");*/
                domainCountDict[currentDom.join(".")] = parseInt(curCnt);


                //console.log(domainCountDict);
              }

            }

          }

  }

  return domainCountDict;

}

console.log(getDomainHits(counts));

If you want to see a complete walkthrough of my logic then you can see my answer to my question in Stack Overflow

Solution

You can modify the input until there’s nothing left of it. In this example, I’ve used a regex with .replace() to shorten the FQDN.

If you don’t like regex, you could instead split the domain name on ., then in the loop use dom.join(".") to compose the object key, followed by dom.shift() to shorten the array. With the array technique, the loop conditional is dom.length (instead of just dom).

function getDomainHits(arr){
    let total={};
    arr.forEach( row => {
        let [hits, dom] = row.split(",");
        hits=parseInt(hits);
        while ( hits && dom ) { 
            total[dom] = (total[dom] || 0) + hits;
            dom = dom.replace( /^[^.]*.?/, '' )
        }
    });
    return total;
}

Leave a Reply

Your email address will not be published. Required fields are marked *