Problem
This code hasn’t been tested exhaustively – I would love suggestions about a testing library.
Critical, Optional and Positive feedback are more then welcome.
const fs = require('fs');
/**
* strip all comments and create a new file with only code
* @param {int} file name to clean..
* @return {int} function returns 0 at the end.
*/
function clean(file) {
const inputFileName = file;
// create output file name. e.g: test.js -> test_CLEAN.js
let outputFileName = inputFileName.split('.');
outputFileName[0] += '_CLEAN';
outputFileName = outputFileName.join('.');
// clean output file old data
fs.writeFile(outputFileName, ``, (err) => {
if (err) {
console.log(err);
}
});
// comments regex defentions
// lines starts with // where // can have spaces before for indentation
const singleLineComment = /^s*///;
// start skipping lines on /* can have spaces before
const multiLineCommentStart = /^s*/*/;
// stop skipping lines on */ end the end of the line
const multiLineCommentEnd = /*/$/;
// mixed lines with code and inline comments
const singleLineCommentAfterCode = /[;]s*///;
// skip lines bool
let skipLine = false;
// load input file to memory, split it by lines, filter white space lines, then loop over each line
fs.readFileSync(inputFileName, 'utf-8')
.split(/n/)
.filter((line) => line != '')
.forEach((line) => {
// return in a forEach() callback is equivalent to continue in a conventional for loop.
if (multiLineCommentStart.test(line)) {
skipLine = true;
return;
}
if (multiLineCommentEnd.test(line)) {
skipLine = false;
return;
}
if (singleLineComment.test(line)) {
return;
}
if (singleLineCommentAfterCode.test(line)) {
line = line.split(' //')[0];
}
// append lines to output file
if (!skipLine) {
// append additional n at the end of a blocks (after '};')
line = line === '};' ? line + 'n' : line;
fs.appendFileSync(outputFileName, `${line}n`, (err) => {
if (err) {
// append failed
console.log(err);
}
});
}
});
return 0;
}
Solution
To handle codes, use a parser. Build an AST from parser (maybe comments are already removed there). And then convert it to codes. A FSM implementation may work. (I’m not sure). But I believe your current RegExp approach are far from working.
Anyway,
1. Line break
JavaScript support 5 different line terminator sequence. Your code only support one or two of them.
2. Single line comment
Line end no need to have ;
.
var a = 42// comment
3. Multi line comment
Start / end of multi line comment no need to take a single line. They may appear on the same line.
var a = 42; /* comment */
var a = 42; /*
*/ var b = 42;
3. Some more complex comments
After you fix above issues, you are ready to handle more complex situations:
var a = 3
/*
*// 3;
/*/ /*/
/* // */
// /*
var b = 4;
// */
4. String and RegExp
Text in strings / regular expressions are not comments.
var a = '; // ';
var a = '
// ';
var a = `
// `;
var a = /[; //]/;
This would be more complex if mix them up.
var c = `; // ${function () {
a = '}';
// Wow, SO failed to use correct syntax highlight for this too!
}}`
5. HTML like comments
This could be optional. Most browsers supports HTML like comments. But this is not required to every JavaScript engine.
var a = 42;
--> this is comment
<!-- this is comment too
6. Shebang
It is up to you if you want to handle shebang as comments. They work on some browsers.
#!/usr/bin/node
var a = 42;