Problem
I made a function for recursively reading a directory and it just seems kind of long and complicated. If you could take a look and tell me what’s good or bad about it, that would be great.
Reading the directory synchronously is not an option.
var dutils = require('./dutils');
module.exports = {
getIndex: function(ignore, cb) {
var root = this.root;
var cbs = 1;
var index = {};
// recursive function for walking directory
(function walk(dir) {
fs.readdir(dir, function(err, files) {
if (err) return cb(err);
files.forEach(function(fileName) {
// prepare
var filePath = path.join(dir, fileName); // filePath with root
var relFilePath = path.relative(root, filePath); // filePath without root
// if the file is on our ignore list don't index it
if (dutils.globmatch(filePath, ignore)) return;
var stats = fs.statSync(filePath);
var isDir = stats.isDirectory();
index[relFilePath] = {
isDir: isDir,
mtime: stats.mtime
};
// if the file is a directory, walk it next
if (isDir) {
// count the number of callbacks that are running concurrently
cbs++;
walk(filePath);
}
});
// our callback was completed
cbs--;
// if all callbacks were completed, we've completely walked our directory
if (cbs === 0) cb(null, index);
});
})(root);
}
}
Solution
My comments are inline starting with the //*
. I also very much encourage you to look at async libraries such as Q.
var dutils = require('./dutils');
//* Why export an object with a single function? Why not just export the function itself?
module.exports = {
//* I don't like intermixing method implementation with exports declaration because you tend to be
//* looking for very different things when looking at each. Hoisting to the rescure!
//* module.exports = { getIndex: getIndex };
//* ... later ...
//* function getIndex(ignore, cb) {
//* ...
getIndex: function(ignore, cb) {
//* What if they didn't pass a callback? Might as well check here and throw an error. Otherwise
//* there will be a confusing error later on
//* What is `this`? I **highly** recommend not using the `this` parameter unless you
//* understand _very_ well what it does and how it works. You can almost always achieve the same
//* thing using simpler techniques
var root = this.root;
//* cds is not a good variable name - I have no idea what this represents reading it
var cbs = 1;
var index = {};
// recursive function for walking directory
(function walk(dir) {
//* I usually prefer naming callback functions. Not only does it allow you to give a good
//* label to the code but the function will show up labeled in stack traces
fs.readdir(dir, function(err, files) {
//* Be aware that it is possible for the callback to be triggered multiple times when there are
//* errors. This is an unusual interface and just be certain that you do this on purpose
if (err) return cb(err);
//* Again, I would recommend labeling this function.
//* Also I'm not sure if this will follow symlinks or not but if does then this might end up in
//* an endless loop.
files.forEach(function(fileName) {
// prepare
var filePath = path.join(dir, fileName); // filePath with root
var relFilePath = path.relative(root, filePath); // filePath without root
//* I don't think this is really a useful comment as it doesn't say much more than
//* the code itself does. I also usually prefer putting the `return` statement for early returns
//* on its own line to make the early return stand out visually.
// if the file is on our ignore list don't index it
if (dutils.globmatch(filePath, ignore)) return;
var stats = fs.statSync(filePath);
var isDir = stats.isDirectory();
index[relFilePath] = {
isDir: isDir,
mtime: stats.mtime
};
//* This comment is not very useful as it - again - just restates what the code does
// if the file is a directory, walk it next
if (isDir) {
// count the number of callbacks that are running concurrently
cbs++;
walk(filePath);
}
});
// our callback was completed
cbs--;
// if all callbacks were completed, we've completely walked our directory
if (cbs === 0) cb(null, index);
//* Is the entire point of the `cbs` counter to see if any `readdir` operation resulted in an error?
//* that seems to be all that it does. Why not just have a `var lastError = null` in `getIndex` and
//* rather than checking for err do `if(lastError = err) return`. That way if `err` is truthy you have
//* an error, otherwise you do not
//*
//* Also keep in mind that cb will be called multiple times but each time with the
//* same instance of the index object. That means that previously invoked callbacks will be
//* having this object mutate underneat them. This might be exactly what you want but it is unuaual
//* and should definitely be documented clearly. At the very least you want a big bold doc comment
//* on this function
});
})(root);
}
}
It’s more efficient, better to maintain and the code becomes much cleaner if you use the promisify native Node function to transform the fs callback functions to promise based functions, then write your code with async/await style.
Code:
const fs = require('fs');
const { promisify } = require('util');
const path = require('path');
const readDirAsync = promisify(fs.readdir);
//
const sourcePath = process.argv[2] || './';
//
(async () => {
// Main execution
var finalRes = await scanDir(sourcePath);
console.log('Scan result', finalRes);
process.exit(0);
// Recursive async function
async function scanDir(dir) {
const list = await readDirAsync(dir);
// Array.map returns promisses because the argument function is async
return Promise.all(list.map(async (name) => {
try {
// File (or directory) info
const itemPath = path.join(dir, name);
const stats = fs.statSync(itemPath);
const isDir = stats.isDirectory();
var children = null;
// Recursion for directories
if (isDir) {
children = await scanDir(itemPath);
}
//
return {
name,
itemPath,
isDir,
parent: dir,
children
};
}
catch (err) {
console.error('Error in scanDir()', err);
}
}));
}
})();