Compare commits

..

No commits in common. "main" and "0.0.1" have entirely different histories.
main ... 0.0.1

4 changed files with 68 additions and 109 deletions

1
.gitignore vendored
View File

@ -1,3 +1,2 @@
**/package-lock.json **/package-lock.json
**/node_modules **/node_modules
dist/*

View File

@ -1,8 +1,8 @@
{ {
"name": "wfa-js", "name": "wfa-js",
"version": "1.0.0", "version": "0.0.1",
"description": "Wavefront alignment algorithm in JS", "description": "Wavefront alignment algorithm in JS",
"main": "tests/test.js", "main": "src/main.js",
"type": "module", "type": "module",
"devDependencies": { "devDependencies": {
"eslint": "^8.43.0", "eslint": "^8.43.0",
@ -13,8 +13,6 @@
"progress": "^2.0.3" "progress": "^2.0.3"
}, },
"scripts": { "scripts": {
"test": "node tests/test.js", "lint": "DEBUG=eslint:cli-engine eslint --fix ."
"lint": "DEBUG=eslint:cli-engine eslint --fix src/*.js tests/*.js",
"minify": "sed -ze 's/\\t//g; s/\\/\\/[[:print:]]*//g;s/\\n//g;' src/wfa.js > dist/wfa.js"
} }
} }

View File

@ -1,4 +1,4 @@
import wfAlign from "../src/wfa.js"; import wf_align from "./wfa.js";
import fs from "fs"; import fs from "fs";
import ProgressBar from "progress"; import ProgressBar from "progress";
@ -7,7 +7,6 @@ data = JSON.parse(data);
const sequences = fs.readFileSync("./tests/sequences").toString().split("\n"); const sequences = fs.readFileSync("./tests/sequences").toString().split("\n");
// const total = sequences.length; // const total = sequences.length;
const total = 500; // skip the later tests because of memory usage const total = 500; // skip the later tests because of memory usage
const timePerChar = [];
for (const test_name of Object.keys(data)) { for (const test_name of Object.keys(data)) {
const test = data[test_name]; const test = data[test_name];
@ -20,10 +19,7 @@ for (const test_name of Object.keys(data)) {
for (let i = 0; i < total; i += 2) { for (let i = 0; i < total; i += 2) {
const s1 = sequences[i].replace(">"); const s1 = sequences[i].replace(">");
const s2 = sequences[i + 1].replace("<"); const s2 = sequences[i + 1].replace("<");
const start = process.hrtime()[1]; const { CIGAR, score } = wf_align(s1, s2, penalties);
const { score } = wfAlign(s1, s2, penalties, false);
const elapsed = process.hrtime()[1] - start;
timePerChar.push((elapsed / 1e9) / (s1.length + s2.length));
const solution_score = Number(solutions[j].split("\t")[0]); const solution_score = Number(solutions[j].split("\t")[0]);
if (solution_score === -score) { if (solution_score === -score) {
correct += 1; correct += 1;
@ -32,10 +28,4 @@ for (const test_name of Object.keys(data)) {
bar.tick(); bar.tick();
} }
console.log(`correct: ${correct}\ntotal: ${total / 2}\n`); console.log(`correct: ${correct}\ntotal: ${total / 2}\n`);
console.log(`average time per character (ms): ${average(timePerChar) * 1000}`);
}
function average (arr) {
const sum = arr.reduce((a, b) => a + b, 0);
return sum / arr.length;
} }

View File

@ -7,7 +7,7 @@ class WavefrontComponent {
} }
// get value for wavefront=score, diag=k // get value for wavefront=score, diag=k
getVal (score, k) { get_val (score, k) {
if (this.W[score] !== undefined && this.W[score][k] !== undefined) { if (this.W[score] !== undefined && this.W[score][k] !== undefined) {
return this.W[score][k]; return this.W[score][k];
} }
@ -17,7 +17,7 @@ class WavefrontComponent {
} }
// set value for wavefront=score, diag=k // set value for wavefront=score, diag=k
setVal (score, k, val) { set_val (score, k, val) {
if (this.W[score]) { if (this.W[score]) {
this.W[score][k] = val; this.W[score][k] = val;
} }
@ -28,7 +28,7 @@ class WavefrontComponent {
} }
// get alignment traceback // get alignment traceback
getTraceback (score, k) { get_traceback (score, k) {
if (this.A[score] !== undefined && this.A[score][k] !== undefined) { if (this.A[score] !== undefined && this.A[score][k] !== undefined) {
return this.A[score][k]; return this.A[score][k];
} }
@ -38,7 +38,7 @@ class WavefrontComponent {
} }
// set alignment traceback // set alignment traceback
setTraceback (score, k, traceback) { set_traceback (score, k, traceback) {
if (this.A[score]) { if (this.A[score]) {
this.A[score][k] = traceback; this.A[score][k] = traceback;
} }
@ -49,24 +49,24 @@ class WavefrontComponent {
} }
// get hi for wavefront=score // get hi for wavefront=score
getHi (score) { get_hi (score) {
const hi = this.hi[score]; const hi = this.hi[score];
return isNaN(hi) ? 0 : hi; return isNaN(hi) ? 0 : hi;
} }
// set hi for wavefront=score // set hi for wavefront=score
setHi (score, hi) { set_hi (score, hi) {
this.hi[score] = hi; this.hi[score] = hi;
} }
// get lo for wavefront=score // get lo for wavefront=score
getLo (score) { get_lo (score) {
const lo = this.lo[score]; const lo = this.lo[score];
return isNaN(lo) ? 0 : lo; return isNaN(lo) ? 0 : lo;
} }
// set lo for wavefront=score // set lo for wavefront=score
setLo (score, lo) { set_lo (score, lo) {
this.lo[score] = lo; this.lo[score] = lo;
} }
@ -184,45 +184,41 @@ function argmax (args) {
return args.indexOf(val); return args.indexOf(val);
} }
export default function wfAlign (s1, s2, penalties, doCIGAR = false) { export default function wf_align (s1, s2, penalties) {
const n = s1.length; const n = s1.length;
const m = s2.length; const m = s2.length;
const A_k = m - n; const A_k = m - n;
const A_offset = m; const A_offset = m;
let score = 0; let score = 0;
const M = new WavefrontComponent(); const M = new WavefrontComponent();
M.setVal(0, 0, 0); M.set_val(0, 0, 0);
M.setHi(0, 0); M.set_hi(0, 0);
M.setLo(0, 0); M.set_lo(0, 0);
M.setTraceback(0, 0, traceback.End); M.set_traceback(0, 0, traceback.End);
const I = new WavefrontComponent(); const I = new WavefrontComponent();
const D = new WavefrontComponent(); const D = new WavefrontComponent();
while (true) { while (true) {
wfExtend(M, s1, n, s2, m, score); wf_extend(M, s1, n, s2, m, score);
if (M.getVal(score, A_k) >= A_offset) { if (M.get_val(score, A_k) >= A_offset) {
break; break;
} }
score++; score++;
wfNext(M, I, D, score, penalties); wf_next(M, I, D, score, penalties);
} }
let CIGAR = null; return wf_backtrace(M, I, D, score, penalties, A_k, A_offset);
if (doCIGAR) {
CIGAR = wfBacktrace(M, I, D, score, penalties, A_k, s1, s2);
}
return { score, CIGAR };
} }
function wfExtend (M, s1, n, s2, m, score) { function wf_extend (M, s1, n, s2, m, score) {
const lo = M.getLo(score); const lo = M.get_lo(score);
const hi = M.getHi(score); const hi = M.get_hi(score);
for (let k = lo; k <= hi; k++) { for (let k = lo; k <= hi; k++) {
let v = M.getVal(score, k) - k; let v = M.get_val(score, k) - k;
let h = M.getVal(score, k); let h = M.get_val(score, k);
if (isNaN(v) || isNaN(h)) { if (isNaN(v) || isNaN(h)) {
continue; continue;
} }
while (s1[v] === s2[h]) { while (s1[v] === s2[h]) {
M.setVal(score, k, M.getVal(score, k) + 1); M.set_val(score, k, M.get_val(score, k) + 1);
v++; v++;
h++; h++;
if (v > n || h > m) { if (v > n || h > m) {
@ -232,49 +228,49 @@ function wfExtend (M, s1, n, s2, m, score) {
} }
} }
function wfNext (M, I, D, score, penalties, do_traceback) { function wf_next (M, I, D, score, penalties) {
const x = penalties.x; const x = penalties.x;
const o = penalties.o; const o = penalties.o;
const e = penalties.e; const e = penalties.e;
const lo = min([M.getLo(score - x), M.getLo(score - o - e), I.getLo(score - e), D.getLo(score - e)]) - 1; const lo = min([M.get_lo(score - x), M.get_lo(score - o - e), I.get_lo(score - e), D.get_lo(score - e)]) - 1;
const hi = max([M.getHi(score - x), M.getHi(score - o - e), I.getHi(score - e), D.getHi(score - e)]) + 1; const hi = max([M.get_hi(score - x), M.get_hi(score - o - e), I.get_hi(score - e), D.get_hi(score - e)]) + 1;
M.setHi(score, hi); M.set_hi(score, hi);
I.setHi(score, hi); I.set_hi(score, hi);
D.setHi(score, hi); D.set_hi(score, hi);
M.setLo(score, lo); M.set_lo(score, lo);
I.setLo(score, lo); I.set_lo(score, lo);
D.setLo(score, lo); D.set_lo(score, lo);
for (let k = lo; k <= hi; k++) { for (let k = lo; k <= hi; k++) {
I.setVal(score, k, max([ I.set_val(score, k, max([
M.getVal(score - o - e, k - 1), M.get_val(score - o - e, k - 1),
I.getVal(score - e, k - 1) I.get_val(score - e, k - 1)
]) + 1); ]) + 1);
I.setTraceback(score, k, [traceback.OpenIns, traceback.ExtdIns][argmax([ I.set_traceback(score, k, [traceback.OpenIns, traceback.ExtdIns][argmax([
M.getVal(score - o - e, k - 1), M.get_val(score - o - e, k - 1),
I.getVal(score - e, k - 1) I.get_val(score - e, k - 1)
])]); ])]);
D.setVal(score, k, max([ D.set_val(score, k, max([
M.getVal(score - o - e, k + 1), M.get_val(score - o - e, k + 1),
D.getVal(score - e, k + 1) D.get_val(score - e, k + 1)
])); ]));
D.setTraceback(score, k, [traceback.OpenDel, traceback.ExtdDel][argmax([ D.set_traceback(score, k, [traceback.OpenDel, traceback.ExtdDel][argmax([
M.getVal(score - o - e, k + 1), M.get_val(score - o - e, k + 1),
D.getVal(score - e, k + 1) D.get_val(score - e, k + 1)
])]); ])]);
M.setVal(score, k, max([ M.set_val(score, k, max([
M.getVal(score - x, k) + 1, M.get_val(score - x, k) + 1,
I.getVal(score, k), I.get_val(score, k),
D.getVal(score, k) D.get_val(score, k)
])); ]));
M.setTraceback(score, k, [traceback.Sub, traceback.Ins, traceback.Del][argmax([ M.set_traceback(score, k, [traceback.Sub, traceback.Ins, traceback.Del][argmax([
M.getVal(score - x, k) + 1, M.get_val(score - x, k) + 1,
I.getVal(score, k), I.get_val(score, k),
D.getVal(score, k) D.get_val(score, k)
])]); ])]);
} }
} }
function wfBacktrace (M, I, D, score, penalties, A_k, s1, s2) { function wf_backtrace (M, I, D, score, penalties, A_k) {
const traceback_CIGAR = ["I", "I", "D", "D", "X", "", "", ""]; const traceback_CIGAR = ["I", "I", "D", "D", "X", "", "", ""];
const x = penalties.x; const x = penalties.x;
const o = penalties.o; const o = penalties.o;
@ -282,7 +278,7 @@ function wfBacktrace (M, I, D, score, penalties, A_k, s1, s2) {
let CIGAR_rev = ""; // reversed CIGAR let CIGAR_rev = ""; // reversed CIGAR
let tb_s = score; // traceback score let tb_s = score; // traceback score
let tb_k = A_k; // traceback diag k let tb_k = A_k; // traceback diag k
let current_traceback = M.getTraceback(tb_s, tb_k); let current_traceback = M.get_traceback(tb_s, tb_k);
let done = false; let done = false;
while (!done) { while (!done) {
CIGAR_rev += traceback_CIGAR[current_traceback]; CIGAR_rev += traceback_CIGAR[current_traceback];
@ -290,67 +286,43 @@ function wfBacktrace (M, I, D, score, penalties, A_k, s1, s2) {
case traceback.OpenIns: case traceback.OpenIns:
tb_s = tb_s - o - e; tb_s = tb_s - o - e;
tb_k = tb_k - 1; tb_k = tb_k - 1;
current_traceback = M.getTraceback(tb_s, tb_k); current_traceback = M.get_traceback(tb_s, tb_k);
break; break;
case traceback.ExtdIns: case traceback.ExtdIns:
tb_s = tb_s - e; tb_s = tb_s - e;
tb_k = tb_k - 1; tb_k = tb_k - 1;
current_traceback = I.getTraceback(tb_s, tb_k); current_traceback = I.get_traceback(tb_s, tb_k);
break; break;
case traceback.OpenDel: case traceback.OpenDel:
tb_s = tb_s - o - e; tb_s = tb_s - o - e;
tb_k = tb_k + 1; tb_k = tb_k + 1;
current_traceback = M.getTraceback(tb_s, tb_k); current_traceback = M.get_traceback(tb_s, tb_k);
break; break;
case traceback.ExtdDel: case traceback.ExtdDel:
tb_s = tb_s - e; tb_s = tb_s - e;
tb_k = tb_k + 1; tb_k = tb_k + 1;
current_traceback = D.getTraceback(tb_s, tb_k); current_traceback = D.get_traceback(tb_s, tb_k);
break; break;
case traceback.Sub: case traceback.Sub:
tb_s = tb_s - x; tb_s = tb_s - x;
// tb_k = tb_k; // tb_k = tb_k;
current_traceback = M.getTraceback(tb_s, tb_k); current_traceback = M.get_traceback(tb_s, tb_k);
break; break;
case traceback.Ins: case traceback.Ins:
// tb_s = tb_s; // tb_s = tb_s;
// tb_k = tb_k; // tb_k = tb_k;
current_traceback = I.getTraceback(tb_s, tb_k); current_traceback = I.get_traceback(tb_s, tb_k);
break; break;
case traceback.Del: case traceback.Del:
// tb_s = tb_s; // tb_s = tb_s;
// tb_k = tb_k; // tb_k = tb_k;
current_traceback = D.getTraceback(tb_s, tb_k); current_traceback = D.get_traceback(tb_s, tb_k);
break; break;
case traceback.End: case traceback.End:
done = true; done = true;
break; break;
} }
} }
const CIGAR_part = Array.from(CIGAR_rev).reverse(); // still missing Match positions const CIGAR = Array.from(CIGAR_rev).reverse().join("");
let c = 0; return { CIGAR, score };
let i = 0;
let j = 0;
while (i < s1.length && j < s2.length) { // iterate through the strings to back-solve match positions
if (s1[i] === s2[j]) { // match, insert M and then increment c, i, j
CIGAR_part.splice(c, 0, "M");
c++;
i++;
j++;
}
else if (CIGAR_part[c] === "X") { // mismatch, increment c, i, j
c++;
i++;
j++;
}
else if (CIGAR_part[c] === "I") { // insertion of character to s1 to reach s2, increment c,j
c++;
j++;
}
else if (CIGAR_part[c] === "D") { // deletion of character from s1 to reach s2, increment c,i
c++;
i++;
}
}
return CIGAR_part.join("");
} }