improve CIGAR return value to include match positions,

fix test
This commit is contained in:
Arthur Lu 2024-08-06 17:44:51 +00:00
parent 24bbe15a12
commit 096d184b4b
2 changed files with 30 additions and 5 deletions

View File

@ -207,7 +207,7 @@ export default function wfAlign (s1, s2, penalties, doCIGAR = false) {
} }
let CIGAR = null; let CIGAR = null;
if (doCIGAR) { if (doCIGAR) {
CIGAR = wfBacktrace(M, I, D, score, penalties, A_k, A_offset); CIGAR = wfBacktrace(M, I, D, score, penalties, A_k, s1, s2);
} }
return { score, CIGAR }; return { score, CIGAR };
} }
@ -274,7 +274,7 @@ function wfNext (M, I, D, score, penalties, do_traceback) {
} }
} }
function wfBacktrace (M, I, D, score, penalties, A_k) { function wfBacktrace (M, I, D, score, penalties, A_k, s1, s2) {
const traceback_CIGAR = ["I", "I", "D", "D", "X", "", "", ""]; const traceback_CIGAR = ["I", "I", "D", "D", "X", "", "", ""];
const x = penalties.x; const x = penalties.x;
const o = penalties.o; const o = penalties.o;
@ -327,5 +327,30 @@ function wfBacktrace (M, I, D, score, penalties, A_k) {
break; break;
} }
} }
return Array.from(CIGAR_rev).reverse().join(""); const CIGAR_part = Array.from(CIGAR_rev).reverse(); // still missing Match positions
let c = 0;
let i = 0;
let j = 0;
while (i < s1.length && j < s2.length) { // iterate through the strings to back-solve match positions
if (s1[i] === s2[j]) { // match, insert M and then increment c, i, j
CIGAR_part.splice(c, 0, "M");
c++;
i++;
j++;
}
else if (CIGAR_part[c] === "X") { // mismatch, increment c, i, j
c++;
i++;
j++;
}
else if (CIGAR_part[c] === "I") { // insertion of character to s1 to reach s2, increment c,j
c++;
j++;
}
else if (CIGAR_part[c] === "D") { // deletion of character from s1 to reach s2, increment c,i
c++;
i++;
}
}
return CIGAR_part.join("");
} }

View File

@ -1,4 +1,4 @@
import wf_align from "../src/wfa.js"; import wfAlign from "../src/wfa.js";
import fs from "fs"; import fs from "fs";
import ProgressBar from "progress"; import ProgressBar from "progress";
@ -21,7 +21,7 @@ for (const test_name of Object.keys(data)) {
const s1 = sequences[i].replace(">"); const s1 = sequences[i].replace(">");
const s2 = sequences[i + 1].replace("<"); const s2 = sequences[i + 1].replace("<");
const start = process.hrtime()[1]; const start = process.hrtime()[1];
const { score } = wf_align(s1, s2, penalties, false); const { score } = wfAlign(s1, s2, penalties, false);
const elapsed = process.hrtime()[1] - start; const elapsed = process.hrtime()[1] - start;
timePerChar.push((elapsed / 1e9) / (s1.length + s2.length)); timePerChar.push((elapsed / 1e9) / (s1.length + s2.length));
const solution_score = Number(solutions[j].split("\t")[0]); const solution_score = Number(solutions[j].split("\t")[0]);