2024-10-24 18:07:10 +00:00
|
|
|
package tests
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"encoding/json"
|
2024-11-07 19:01:01 +00:00
|
|
|
"log"
|
2024-11-05 18:28:28 +00:00
|
|
|
"math/rand/v2"
|
2024-10-24 18:07:10 +00:00
|
|
|
"os"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"testing"
|
|
|
|
wfa "wfa/pkg"
|
|
|
|
|
|
|
|
"github.com/schollz/progressbar/v3"
|
|
|
|
)
|
|
|
|
|
|
|
|
const testJsonPath = "tests.json"
|
|
|
|
const testSequences = "sequences"
|
|
|
|
|
|
|
|
type TestPenalty struct {
|
|
|
|
M int `json:"m"`
|
|
|
|
X int `json:"x"`
|
|
|
|
O int `json:"o"`
|
|
|
|
E int `json:"e"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type TestCase struct {
|
|
|
|
Penalties TestPenalty `json:"penalties"`
|
|
|
|
Solutions string `json:"solutions"`
|
|
|
|
}
|
|
|
|
|
2024-11-05 18:28:28 +00:00
|
|
|
func randRange(min, max int) uint32 {
|
|
|
|
return uint32(rand.IntN(max-min) + min)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestWavefrontPacking(t *testing.T) {
|
|
|
|
for range 1000 {
|
|
|
|
val := randRange(0, 1000)
|
|
|
|
tb := wfa.Traceback(randRange(0, 7))
|
|
|
|
v := wfa.PackWavefrontValue(val, tb)
|
|
|
|
|
|
|
|
valid, gotVal, gotTB := wfa.UnpackWavefrontValue(v)
|
|
|
|
|
|
|
|
if !valid || gotVal != val || gotTB != tb {
|
|
|
|
t.Errorf(`test WavefrontPack/Unpack, val: %d, tb: %d, packedval: %x, gotok: %t, gotval: %d, gottb: %d\n`, val, tb, v, valid, gotVal, gotTB)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-11-07 19:01:01 +00:00
|
|
|
func GetScoreFromCIGAR(CIGAR string, penalties wfa.Penalty) int {
|
|
|
|
unpackedCIGAR := wfa.RunLengthDecode(CIGAR)
|
|
|
|
previousOp := '~'
|
|
|
|
score := 0
|
|
|
|
for _, Op := range unpackedCIGAR {
|
|
|
|
if Op == 'M' {
|
|
|
|
score = score + penalties.M
|
|
|
|
} else if Op == 'X' {
|
|
|
|
score = score + penalties.X
|
|
|
|
} else if (Op == 'I' && previousOp != 'I') || (Op == 'D' && previousOp != 'D') {
|
|
|
|
score = score + penalties.O + penalties.E
|
|
|
|
} else if (Op == 'I' && previousOp == 'I') || (Op == 'D' && previousOp == 'D') {
|
|
|
|
score = score + penalties.E
|
|
|
|
}
|
|
|
|
previousOp = Op
|
|
|
|
}
|
|
|
|
return score
|
|
|
|
}
|
|
|
|
|
|
|
|
func CheckCIGARCorrectness(s1 string, s2 string, CIGAR string) bool {
|
|
|
|
unpackedCIGAR := wfa.RunLengthDecode(CIGAR)
|
|
|
|
i := 0
|
|
|
|
j := 0
|
|
|
|
|
|
|
|
s1Aligned := strings.Builder{}
|
|
|
|
alignment := strings.Builder{}
|
|
|
|
s2Aligned := strings.Builder{}
|
|
|
|
|
|
|
|
for c := 0; c < len(unpackedCIGAR); c++ {
|
|
|
|
Op := unpackedCIGAR[c]
|
|
|
|
if Op == 'M' {
|
|
|
|
s1Aligned.WriteByte(s1[i])
|
|
|
|
alignment.WriteRune('|')
|
|
|
|
s2Aligned.WriteByte(s2[j])
|
|
|
|
i++
|
|
|
|
j++
|
|
|
|
} else if Op == 'X' {
|
|
|
|
s1Aligned.WriteByte(s1[i])
|
|
|
|
alignment.WriteRune(' ')
|
|
|
|
s2Aligned.WriteByte(s2[j])
|
|
|
|
i++
|
|
|
|
j++
|
|
|
|
} else if Op == 'I' {
|
|
|
|
|
|
|
|
s1Aligned.WriteRune('-')
|
|
|
|
alignment.WriteRune(' ')
|
|
|
|
s2Aligned.WriteByte(s2[j])
|
|
|
|
|
|
|
|
j++
|
|
|
|
} else if Op == 'D' {
|
|
|
|
s1Aligned.WriteByte(s1[i])
|
|
|
|
alignment.WriteRune('|')
|
|
|
|
s2Aligned.WriteRune('-')
|
|
|
|
i++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if i == len(s1) && j == len(s2) {
|
|
|
|
return true
|
|
|
|
} else {
|
|
|
|
log.Printf("\n%s\n%s\n%s\n i=%d, j=%d, |s1|=%d, |s2|=%d\n", s1Aligned.String(), alignment.String(), s2Aligned.String(), i, j, len(s1), len(s2))
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-10-24 18:07:10 +00:00
|
|
|
func TestWFA(t *testing.T) {
|
|
|
|
content, _ := os.ReadFile(testJsonPath)
|
|
|
|
|
|
|
|
var testMap map[string]TestCase
|
|
|
|
json.Unmarshal(content, &testMap)
|
|
|
|
|
|
|
|
for k, v := range testMap {
|
|
|
|
testName := k
|
|
|
|
|
|
|
|
testPenalties := wfa.Penalty{
|
|
|
|
M: v.Penalties.M,
|
|
|
|
X: v.Penalties.X,
|
|
|
|
O: v.Penalties.O,
|
|
|
|
E: v.Penalties.E,
|
|
|
|
}
|
|
|
|
|
|
|
|
sequencesFile, _ := os.Open(testSequences)
|
|
|
|
sequences := bufio.NewScanner(sequencesFile)
|
|
|
|
solutionsFile, _ := os.Open(v.Solutions)
|
|
|
|
solutions := bufio.NewScanner(solutionsFile)
|
|
|
|
|
|
|
|
bar := progressbar.Default(305, k)
|
|
|
|
|
|
|
|
idx := 0
|
|
|
|
|
|
|
|
for solutions.Scan() {
|
|
|
|
solution := solutions.Text()
|
2024-11-07 19:01:01 +00:00
|
|
|
|
2024-10-24 18:07:10 +00:00
|
|
|
expectedScore, _ := strconv.Atoi(strings.Split(solution, "\t")[0])
|
2024-11-07 19:01:01 +00:00
|
|
|
expectedCIGAR := strings.Split(solution, "\t")[1]
|
2024-10-24 18:07:10 +00:00
|
|
|
|
|
|
|
sequences.Scan()
|
|
|
|
s1 := sequences.Text()
|
|
|
|
s1 = s1[1:]
|
|
|
|
|
|
|
|
sequences.Scan()
|
|
|
|
s2 := sequences.Text()
|
|
|
|
s2 = s2[1:]
|
|
|
|
|
2024-11-05 05:35:46 +00:00
|
|
|
x := wfa.WFAlign(s1, s2, testPenalties, true)
|
2024-10-24 18:07:10 +00:00
|
|
|
gotScore := x.Score
|
2024-11-07 19:01:01 +00:00
|
|
|
gotCIGAR := x.CIGAR
|
2024-10-24 18:07:10 +00:00
|
|
|
|
|
|
|
if gotScore != -1*expectedScore {
|
2024-11-07 19:01:01 +00:00
|
|
|
t.Errorf(`test: %s#%d, s1: %s, s2: %s, got: %d, expected: %d`, testName, idx, s1, s2, gotScore, expectedScore)
|
|
|
|
os.Exit(1)
|
|
|
|
}
|
|
|
|
|
|
|
|
if gotCIGAR != expectedCIGAR {
|
|
|
|
checkScore := GetScoreFromCIGAR(gotCIGAR, testPenalties)
|
|
|
|
CIGARCorrectness := CheckCIGARCorrectness(s1, s2, gotCIGAR)
|
|
|
|
if checkScore != gotScore && checkScore != -1*expectedScore { // nonequivalent alignment
|
|
|
|
t.Errorf(`test: %s#%d, s1: %s, s2: %s, got: [%s], expected: [%s]`, testName, idx, s1, s2, gotCIGAR, expectedCIGAR)
|
|
|
|
t.Errorf(`test: %s#%d, recalculated score: %d`, testName, idx, checkScore)
|
|
|
|
os.Exit(1)
|
|
|
|
}
|
|
|
|
if !CIGARCorrectness {
|
|
|
|
t.Errorf(`test: %s#%d, s1: %s, s2: %s, got: [%s], expected: [%s]`, testName, idx, s1, s2, gotCIGAR, expectedCIGAR)
|
|
|
|
os.Exit(1)
|
|
|
|
}
|
2024-10-24 18:07:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
idx++
|
|
|
|
bar.Add(1)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|