Compare commits
5 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
c94a7902c0 | ||
8ccb45748d | |||
63a375995c | |||
2351faf2d7 | |||
a446bbd923 |
10
Makefile
10
Makefile
@@ -1,9 +1,9 @@
|
|||||||
.PHONY: build clean test
|
.PHONY: build clean test dev-init
|
||||||
|
|
||||||
build: clean
|
build: clean
|
||||||
@echo "======================== Building Binary ======================="
|
@echo "======================== Building Binary ======================="
|
||||||
minify wfa.js > dist/wfa.js
|
minify wfa.js > dist/wfa.js
|
||||||
GOOS=js GOARCH=wasm CGO_ENABLED=0 tinygo build -panic=trap -no-debug -opt=2 -target=wasm -o dist/wfa.wasm .
|
GOOS=js GOARCH=wasm CGO_ENABLED=0 tinygo build -panic=trap -no-debug -opt=s -target=wasm -o dist/wfa.wasm .
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@echo "======================== Cleaning Project ======================"
|
@echo "======================== Cleaning Project ======================"
|
||||||
@@ -23,4 +23,8 @@ test:
|
|||||||
go tool pprof -top mem.prof
|
go tool pprof -top mem.prof
|
||||||
@rm -f mem.prof
|
@rm -f mem.prof
|
||||||
|
|
||||||
@rm -f test.test
|
@rm -f test.test
|
||||||
|
|
||||||
|
dev-init:
|
||||||
|
apt install minify
|
||||||
|
go get -t wfa/test
|
@@ -1,6 +1,6 @@
|
|||||||
# Using WFA-JS
|
# Using WFA-JS
|
||||||
|
|
||||||
Download `wfa.js` and `wfa.wasm`from [releases](https://git.tronnet.net/tronnet/WFA-JS/releases) to your project. Add to your script:
|
Download `wfa.js` and `wfa.wasm`from [releases](https://git.tronnet.net/alu/WFA-JS/releases) to your project. Add to your script:
|
||||||
|
|
||||||
```
|
```
|
||||||
import wfa from "./wfa.js"
|
import wfa from "./wfa.js"
|
||||||
|
4
go.mod
4
go.mod
@@ -1,10 +1,10 @@
|
|||||||
module wfa
|
module wfa
|
||||||
|
|
||||||
go 1.23.2
|
go 1.25.1
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/schollz/progressbar/v3 v3.17.1
|
github.com/schollz/progressbar/v3 v3.17.1
|
||||||
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f
|
golang.org/x/exp v0.0.0-20250911091902-df9299821621
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
31
index.html
Normal file
31
index.html
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<script src="dist/wfa.js" type="module"></script>
|
||||||
|
<script type="module">
|
||||||
|
import wfaInit from "./dist/wfa.js";
|
||||||
|
window.addEventListener("DOMContentLoaded", () => {
|
||||||
|
wfaInit("dist/wfa.wasm");
|
||||||
|
document.querySelector("#submit").addEventListener("click", () => {
|
||||||
|
a = document.querySelector("#a").value
|
||||||
|
b = document.querySelector("#b").value
|
||||||
|
const penalties = {
|
||||||
|
m: 0,
|
||||||
|
x: 1,
|
||||||
|
o: 0,
|
||||||
|
e: 1
|
||||||
|
};
|
||||||
|
const { score, CIGAR } = global.wfAlign(a, b, penalties, true);
|
||||||
|
const alignment = global.DecodeCIGAR(CIGAR);
|
||||||
|
document.querySelector("#result").innerText = `${score}, ${CIGAR}, ${alignment}`;
|
||||||
|
})
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<label>A: </label><input id="a">
|
||||||
|
<label>B: </label><input id="b">
|
||||||
|
<button id="submit">Submit</button>
|
||||||
|
<p><span>Result: </span><span id="result"></span></p>
|
||||||
|
</body>
|
||||||
|
</html>
|
@@ -6,38 +6,31 @@ type PositiveSlice[T any] struct {
|
|||||||
defaultValue T
|
defaultValue T
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *PositiveSlice[T]) TranslateIndex(idx int) int {
|
|
||||||
return idx
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *PositiveSlice[T]) Valid(idx int) bool {
|
func (a *PositiveSlice[T]) Valid(idx int) bool {
|
||||||
actualIdx := a.TranslateIndex(idx)
|
return 0 <= idx && idx < len(a.valid) && a.valid[idx]
|
||||||
return 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *PositiveSlice[T]) Get(idx int) T {
|
func (a *PositiveSlice[T]) Get(idx int) T {
|
||||||
actualIdx := a.TranslateIndex(idx)
|
if 0 <= idx && idx < len(a.valid) && a.valid[idx] { // idx is in the slice
|
||||||
if 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx] { // idx is in the slice
|
return a.data[idx]
|
||||||
return a.data[actualIdx]
|
|
||||||
} else { // idx is out of the slice
|
} else { // idx is out of the slice
|
||||||
return a.defaultValue
|
return a.defaultValue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *PositiveSlice[T]) Set(idx int, value T) {
|
func (a *PositiveSlice[T]) Set(idx int, value T) {
|
||||||
actualIdx := a.TranslateIndex(idx)
|
if idx >= len(a.valid) { // idx is outside the slice
|
||||||
if actualIdx < 0 || actualIdx >= len(a.valid) { // idx is outside the slice
|
// expand data array to 2*idx
|
||||||
// expand data array to actualIdx
|
newData := make([]T, 2*idx+1)
|
||||||
newData := make([]T, 2*actualIdx+1)
|
|
||||||
copy(newData, a.data)
|
copy(newData, a.data)
|
||||||
a.data = newData
|
a.data = newData
|
||||||
|
|
||||||
// expand valid array to actualIdx
|
// expand valid array to 2*idx
|
||||||
newValid := make([]bool, 2*actualIdx+1)
|
newValid := make([]bool, 2*idx+1)
|
||||||
copy(newValid, a.valid)
|
copy(newValid, a.valid)
|
||||||
a.valid = newValid
|
a.valid = newValid
|
||||||
}
|
}
|
||||||
|
|
||||||
a.data[actualIdx] = value
|
a.data[idx] = value
|
||||||
a.valid[actualIdx] = true
|
a.valid[idx] = true
|
||||||
}
|
}
|
||||||
|
36
pkg/types.go
36
pkg/types.go
@@ -1,5 +1,9 @@
|
|||||||
package wfa
|
package wfa
|
||||||
|
|
||||||
|
type Integer interface {
|
||||||
|
~int | ~int8 | ~int16 | ~int32 | ~int64 | ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr
|
||||||
|
}
|
||||||
|
|
||||||
type Result struct {
|
type Result struct {
|
||||||
Score int
|
Score int
|
||||||
CIGAR string
|
CIGAR string
|
||||||
@@ -40,25 +44,22 @@ func UnpackWavefrontLoHi(lohi WavefrontLoHi) (int, int) {
|
|||||||
return loBM, hiBM
|
return loBM, hiBM
|
||||||
}
|
}
|
||||||
|
|
||||||
// bitpacked wavefront values with 1 valid bit, 3 traceback bits, and 28 bits for the diag distance
|
// bitpacked wavefront values with 1 valid bit, 3 traceback bits, and 60 bits for the diag distance
|
||||||
// technically this restricts to alignments with less than 268 million characters but that should be sufficient for most cases
|
type WavefrontValue uint64
|
||||||
type WavefrontValue uint32
|
|
||||||
|
|
||||||
// TODO: add 64 bit packed value in case more than 268 million characters are needed
|
|
||||||
|
|
||||||
// PackWavefrontValue: packs a diag value and traceback into a WavefrontValue
|
// PackWavefrontValue: packs a diag value and traceback into a WavefrontValue
|
||||||
func PackWavefrontValue(value uint32, traceback Traceback) WavefrontValue {
|
func PackWavefrontValue(value uint64, traceback Traceback) WavefrontValue {
|
||||||
validBM := uint32(0x8000_0000)
|
validBM := uint64(0x8000_0000_0000_0000)
|
||||||
tracebackBM := uint32(traceback&0x0000_0007) << 28
|
tracebackBM := uint64(traceback&0x0000_0007) << 60
|
||||||
valueBM := value & 0x0FFF_FFFF
|
valueBM := uint64(value) & 0x0FFF_FFFF_FFFF_FFFF
|
||||||
return WavefrontValue(validBM | tracebackBM | valueBM)
|
return WavefrontValue(validBM | tracebackBM | valueBM)
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnpackWavefrontValue: opens a WavefrontValue into a valid bool, diag value and traceback
|
// UnpackWavefrontValue: opens a WavefrontValue into a valid bool, diag value and traceback
|
||||||
func UnpackWavefrontValue(wfv WavefrontValue) (bool, uint32, Traceback) {
|
func UnpackWavefrontValue(wfv WavefrontValue) (bool, uint64, Traceback) {
|
||||||
validBM := wfv&0x8000_0000 != 0
|
validBM := wfv&0x8000_0000_0000_0000 != 0
|
||||||
tracebackBM := uint8(wfv & 0x7000_0000 >> 28)
|
tracebackBM := uint8(wfv & 0x7000_0000_0000_0000 >> 60)
|
||||||
valueBM := uint32(wfv & 0x0FFF_FFFF)
|
valueBM := uint64(wfv & 0x0000_0000_FFFF_FFFF)
|
||||||
return validBM, valueBM, Traceback(tracebackBM)
|
return validBM, valueBM, Traceback(tracebackBM)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -71,12 +72,9 @@ type Wavefront struct { // since wavefronts store diag distance, they should nev
|
|||||||
// NewWavefront: returns a new wavefront with size accomodating lo and hi (inclusive)
|
// NewWavefront: returns a new wavefront with size accomodating lo and hi (inclusive)
|
||||||
func NewWavefront(lo int, hi int) *Wavefront {
|
func NewWavefront(lo int, hi int) *Wavefront {
|
||||||
a := &Wavefront{}
|
a := &Wavefront{}
|
||||||
|
|
||||||
a.lohi = PackWavefrontLoHi(lo, hi)
|
a.lohi = PackWavefrontLoHi(lo, hi)
|
||||||
size := hi - lo
|
size := hi - lo
|
||||||
|
a.data = make([]WavefrontValue, size+1)
|
||||||
newData := make([]WavefrontValue, size+1)
|
|
||||||
a.data = newData
|
|
||||||
|
|
||||||
return a
|
return a
|
||||||
}
|
}
|
||||||
@@ -134,12 +132,12 @@ func NewWavefrontComponent() *WavefrontComponent {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// GetVal: get value for wavefront=score, diag=k => returns ok, value, traceback
|
// GetVal: get value for wavefront=score, diag=k => returns ok, value, traceback
|
||||||
func (w *WavefrontComponent) GetVal(score int, k int) (bool, uint32, Traceback) {
|
func (w *WavefrontComponent) GetVal(score int, k int) (bool, uint64, Traceback) {
|
||||||
return UnpackWavefrontValue(w.W.Get(score).Get(k))
|
return UnpackWavefrontValue(w.W.Get(score).Get(k))
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetVal: set value, traceback for wavefront=score, diag=k
|
// SetVal: set value, traceback for wavefront=score, diag=k
|
||||||
func (w *WavefrontComponent) SetVal(score int, k int, val uint32, tb Traceback) {
|
func (w *WavefrontComponent) SetVal(score int, k int, val uint64, tb Traceback) {
|
||||||
w.W.Get(score).Set(k, PackWavefrontValue(val, tb))
|
w.W.Get(score).Set(k, PackWavefrontValue(val, tb))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
82
pkg/utils.go
82
pkg/utils.go
@@ -1,23 +1,18 @@
|
|||||||
package wfa
|
package wfa
|
||||||
|
|
||||||
import (
|
const MaxInt = int(^uint(0) >> 1)
|
||||||
"math"
|
const MinInt = -MaxInt - 1
|
||||||
"strings"
|
|
||||||
|
|
||||||
"golang.org/x/exp/constraints"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
// convert an unsigned into to string
|
||||||
func UIntToString(num uint) string { // num assumed to be positive
|
func UIntToString(num uint) string { // num assumed to be positive
|
||||||
var builder strings.Builder
|
str := []rune{}
|
||||||
|
|
||||||
for num > 0 {
|
for num > 0 {
|
||||||
digit := num % 10
|
digit := num % 10
|
||||||
builder.WriteRune(rune('0' + digit))
|
str = append(str, rune('0'+digit))
|
||||||
num /= 10
|
num /= 10
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reverse the string as we built it in reverse order
|
|
||||||
str := []rune(builder.String())
|
|
||||||
for i, j := 0, len(str)-1; i < j; i, j = i+1, j-1 {
|
for i, j := 0, len(str)-1; i < j; i, j = i+1, j-1 {
|
||||||
str[i], str[j] = str[j], str[i]
|
str[i], str[j] = str[j], str[i]
|
||||||
}
|
}
|
||||||
@@ -25,8 +20,9 @@ func UIntToString(num uint) string { // num assumed to be positive
|
|||||||
return string(str)
|
return string(str)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// decode runlength encoded string such as CIGARs
|
||||||
func RunLengthDecode(encoded string) string {
|
func RunLengthDecode(encoded string) string {
|
||||||
decoded := strings.Builder{}
|
decoded := []rune{}
|
||||||
length := len(encoded)
|
length := len(encoded)
|
||||||
i := 0
|
i := 0
|
||||||
|
|
||||||
@@ -42,45 +38,30 @@ func RunLengthDecode(encoded string) string {
|
|||||||
if i < length {
|
if i < length {
|
||||||
char := encoded[i]
|
char := encoded[i]
|
||||||
for j := 0; j < runLength; j++ {
|
for j := 0; j < runLength; j++ {
|
||||||
decoded.WriteByte(char)
|
decoded = append(decoded, rune(char))
|
||||||
}
|
}
|
||||||
i++ // Move past the character
|
i++ // Move past the character
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return decoded.String()
|
return string(decoded)
|
||||||
}
|
}
|
||||||
|
|
||||||
func SafeMin[T constraints.Integer](values []T, idx int) T {
|
// given the min index, return the item in values at that index
|
||||||
|
func SafeMin[T Integer](values []T, idx int) T {
|
||||||
return values[idx]
|
return values[idx]
|
||||||
}
|
}
|
||||||
|
|
||||||
func SafeMax[T constraints.Integer](values []T, idx int) T {
|
// given the max index, return the item in values at that index
|
||||||
|
func SafeMax[T Integer](values []T, idx int) T {
|
||||||
return values[idx]
|
return values[idx]
|
||||||
}
|
}
|
||||||
|
|
||||||
func SafeArgMax[T constraints.Integer](valids []bool, values []T) (bool, int) {
|
// given array of values and corresponding array of valid flags, find the min of value which is valid or return false if there does not exist any
|
||||||
hasValid := false
|
func SafeArgMin[T Integer](valids []bool, values []T) (bool, int) {
|
||||||
maxIndex := 0
|
|
||||||
maxValue := math.MinInt
|
|
||||||
for i := 0; i < len(valids); i++ {
|
|
||||||
if valids[i] && int(values[i]) > maxValue {
|
|
||||||
hasValid = true
|
|
||||||
maxIndex = i
|
|
||||||
maxValue = int(values[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if hasValid {
|
|
||||||
return true, maxIndex
|
|
||||||
} else {
|
|
||||||
return false, 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func SafeArgMin[T constraints.Integer](valids []bool, values []T) (bool, int) {
|
|
||||||
hasValid := false
|
hasValid := false
|
||||||
minIndex := 0
|
minIndex := 0
|
||||||
minValue := math.MaxInt
|
minValue := MaxInt
|
||||||
for i := 0; i < len(valids); i++ {
|
for i := 0; i < len(valids); i++ {
|
||||||
if valids[i] && int(values[i]) < minValue {
|
if valids[i] && int(values[i]) < minValue {
|
||||||
hasValid = true
|
hasValid = true
|
||||||
@@ -95,6 +76,22 @@ func SafeArgMin[T constraints.Integer](valids []bool, values []T) (bool, int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// given array of values and corresponding array of valid flags, find the max of value which is valid or return false if there does not exist any
|
||||||
|
func SafeArgMax[T Integer](valids []bool, values []T) (bool, int) {
|
||||||
|
hasValid := false
|
||||||
|
maxIndex := 0
|
||||||
|
maxValue := MinInt
|
||||||
|
for i := range valids {
|
||||||
|
if valids[i] && int(values[i]) > maxValue {
|
||||||
|
hasValid = true
|
||||||
|
maxIndex = i
|
||||||
|
maxValue = int(values[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return hasValid, maxIndex
|
||||||
|
}
|
||||||
|
|
||||||
|
// set the lext lo and hi bounds for wavefronts M, I, D
|
||||||
func NextLoHi(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty) (int, int) {
|
func NextLoHi(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty) (int, int) {
|
||||||
x := penalties.X
|
x := penalties.X
|
||||||
o := penalties.O
|
o := penalties.O
|
||||||
@@ -125,6 +122,7 @@ func NextLoHi(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponen
|
|||||||
return lo, hi
|
return lo, hi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// set the traceback and diag value for the next I wavefront
|
||||||
func NextI(M *WavefrontComponent, I *WavefrontComponent, score int, k int, penalties Penalty) {
|
func NextI(M *WavefrontComponent, I *WavefrontComponent, score int, k int, penalties Penalty) {
|
||||||
o := penalties.O
|
o := penalties.O
|
||||||
e := penalties.E
|
e := penalties.E
|
||||||
@@ -132,13 +130,14 @@ func NextI(M *WavefrontComponent, I *WavefrontComponent, score int, k int, penal
|
|||||||
a_ok, a, _ := M.GetVal(score-o-e, k-1)
|
a_ok, a, _ := M.GetVal(score-o-e, k-1)
|
||||||
b_ok, b, _ := I.GetVal(score-e, k-1)
|
b_ok, b, _ := I.GetVal(score-e, k-1)
|
||||||
|
|
||||||
ok, nextITraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint32{a, b})
|
ok, nextITraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint64{a, b})
|
||||||
nextIVal := SafeMax([]uint32{a, b}, nextITraceback) + 1 // important that the +1 is here
|
nextIVal := SafeMax([]uint64{a, b}, nextITraceback) + 1 // important that the +1 is here
|
||||||
if ok {
|
if ok {
|
||||||
I.SetVal(score, k, nextIVal, []Traceback{OpenIns, ExtdIns}[nextITraceback])
|
I.SetVal(score, k, nextIVal, []Traceback{OpenIns, ExtdIns}[nextITraceback])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// set the traceback and diag value for the next D wavefront
|
||||||
func NextD(M *WavefrontComponent, D *WavefrontComponent, score int, k int, penalties Penalty) {
|
func NextD(M *WavefrontComponent, D *WavefrontComponent, score int, k int, penalties Penalty) {
|
||||||
o := penalties.O
|
o := penalties.O
|
||||||
e := penalties.E
|
e := penalties.E
|
||||||
@@ -146,13 +145,14 @@ func NextD(M *WavefrontComponent, D *WavefrontComponent, score int, k int, penal
|
|||||||
a_ok, a, _ := M.GetVal(score-o-e, k+1)
|
a_ok, a, _ := M.GetVal(score-o-e, k+1)
|
||||||
b_ok, b, _ := D.GetVal(score-e, k+1)
|
b_ok, b, _ := D.GetVal(score-e, k+1)
|
||||||
|
|
||||||
ok, nextDTraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint32{a, b})
|
ok, nextDTraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint64{a, b})
|
||||||
nextDVal := SafeMax([]uint32{a, b}, nextDTraceback)
|
nextDVal := SafeMax([]uint64{a, b}, nextDTraceback)
|
||||||
if ok {
|
if ok {
|
||||||
D.SetVal(score, k, nextDVal, []Traceback{OpenDel, ExtdDel}[nextDTraceback])
|
D.SetVal(score, k, nextDVal, []Traceback{OpenDel, ExtdDel}[nextDTraceback])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// set the traceback and diag value for the next M wavefront
|
||||||
func NextM(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, k int, penalties Penalty) {
|
func NextM(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, k int, penalties Penalty) {
|
||||||
x := penalties.X
|
x := penalties.X
|
||||||
|
|
||||||
@@ -161,8 +161,8 @@ func NextM(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent,
|
|||||||
b_ok, b, _ := I.GetVal(score, k)
|
b_ok, b, _ := I.GetVal(score, k)
|
||||||
c_ok, c, _ := D.GetVal(score, k)
|
c_ok, c, _ := D.GetVal(score, k)
|
||||||
|
|
||||||
ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []uint32{a, b, c})
|
ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []uint64{a, b, c})
|
||||||
nextMVal := SafeMax([]uint32{a, b, c}, nextMTraceback)
|
nextMVal := SafeMax([]uint64{a, b, c}, nextMTraceback)
|
||||||
if ok {
|
if ok {
|
||||||
M.SetVal(score, k, nextMVal, []Traceback{Sub, Ins, Del}[nextMTraceback])
|
M.SetVal(score, k, nextMVal, []Traceback{Sub, Ins, Del}[nextMTraceback])
|
||||||
}
|
}
|
||||||
|
44
pkg/wfa.go
44
pkg/wfa.go
@@ -1,14 +1,11 @@
|
|||||||
package wfa
|
package wfa
|
||||||
|
|
||||||
import (
|
// WFAlign takes strings s1, s2, penalties, and returns the score and CIGAR if doCIGAR is true
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
|
func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
|
||||||
n := len(s1)
|
n := len(s1)
|
||||||
m := len(s2)
|
m := len(s2)
|
||||||
A_k := m - n
|
A_k := m - n // diagonal where both sequences end
|
||||||
A_offset := uint32(m)
|
A_offset := uint64(m) // offset along a_k diagonal corresponding to end
|
||||||
score := 0
|
score := 0
|
||||||
M := NewWavefrontComponent()
|
M := NewWavefrontComponent()
|
||||||
M.SetLoHi(0, 0, 0)
|
M.SetLoHi(0, 0, 0)
|
||||||
@@ -19,7 +16,7 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
|
|||||||
for {
|
for {
|
||||||
WFExtend(M, s1, n, s2, m, score)
|
WFExtend(M, s1, n, s2, m, score)
|
||||||
ok, val, _ := M.GetVal(score, A_k)
|
ok, val, _ := M.GetVal(score, A_k)
|
||||||
if ok && val >= A_offset {
|
if ok && val >= A_offset { // exit when M_(s,a_k) >= A_offset, ie the wavefront has reached the end
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
score = score + 1
|
score = score + 1
|
||||||
@@ -27,7 +24,7 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
|
|||||||
}
|
}
|
||||||
|
|
||||||
CIGAR := ""
|
CIGAR := ""
|
||||||
if doCIGAR {
|
if doCIGAR { // if doCIGAR, then perform backtrace, otherwise just return the score
|
||||||
CIGAR = WFBacktrace(M, I, D, score, penalties, A_k, A_offset, s1, s2)
|
CIGAR = WFBacktrace(M, I, D, score, penalties, A_k, A_offset, s1, s2)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -39,23 +36,24 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
|
|||||||
|
|
||||||
func WFExtend(M *WavefrontComponent, s1 string, n int, s2 string, m int, score int) {
|
func WFExtend(M *WavefrontComponent, s1 string, n int, s2 string, m int, score int) {
|
||||||
_, lo, hi := M.GetLoHi(score)
|
_, lo, hi := M.GetLoHi(score)
|
||||||
for k := lo; k <= hi; k++ {
|
for k := lo; k <= hi; k++ { // for each diagonal in current wavefront
|
||||||
// v = M[score][k] - k
|
// v = M[score][k] - k
|
||||||
// h = M[score][k]
|
// h = M[score][k]
|
||||||
ok, hu, _ := M.GetVal(score, k)
|
ok, uh, tb := M.GetVal(score, k)
|
||||||
h := int(hu)
|
// exit early if M_(s,l) is invalid
|
||||||
v := h - k
|
|
||||||
|
|
||||||
// exit early if v or h are invalid
|
|
||||||
if !ok {
|
if !ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
for v < n && h < m && s1[v] == s2[h] {
|
h := int(uh)
|
||||||
_, val, tb := M.GetVal(score, k)
|
v := h - k
|
||||||
M.SetVal(score, k, val+1, tb)
|
// in the paper, we do v++, h++, M_(s,k)++
|
||||||
|
// however, note that h = M_(s,k) so instead we just do v++, h++ and set M_(s,k) at the end
|
||||||
|
// this saves a some memory reads and writes
|
||||||
|
for v < n && h < m && s1[v] == s2[h] { // extend diagonal for the next set of matches
|
||||||
v++
|
v++
|
||||||
h++
|
h++
|
||||||
}
|
}
|
||||||
|
M.SetVal(score, k, uint64(h), tb)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -63,14 +61,14 @@ func WFNext(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent,
|
|||||||
// get this score's lo, hi
|
// get this score's lo, hi
|
||||||
lo, hi := NextLoHi(M, I, D, score, penalties)
|
lo, hi := NextLoHi(M, I, D, score, penalties)
|
||||||
|
|
||||||
for k := lo; k <= hi; k++ {
|
for k := lo; k <= hi; k++ { // for each diagonal, extend the matrices for the next wavefronts
|
||||||
NextI(M, I, score, k, penalties)
|
NextI(M, I, score, k, penalties)
|
||||||
NextD(M, D, score, k, penalties)
|
NextD(M, D, score, k, penalties)
|
||||||
NextM(M, I, D, score, k, penalties)
|
NextM(M, I, D, score, k, penalties)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func WFBacktrace(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty, A_k int, A_offset uint32, s1 string, s2 string) string {
|
func WFBacktrace(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty, A_k int, A_offset uint64, s1 string, s2 string) string {
|
||||||
x := penalties.X
|
x := penalties.X
|
||||||
o := penalties.O
|
o := penalties.O
|
||||||
e := penalties.E
|
e := penalties.E
|
||||||
@@ -187,11 +185,11 @@ func WFBacktrace(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontCompo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CIGAR := strings.Builder{}
|
CIGAR := ""
|
||||||
for i := len(Ops) - 1; i > 0; i-- {
|
for i := len(Ops) - 1; i > 0; i-- {
|
||||||
CIGAR.WriteString(UIntToString(Counts[i]))
|
CIGAR += UIntToString(Counts[i])
|
||||||
CIGAR.WriteRune(Ops[i])
|
CIGAR += string(Ops[i])
|
||||||
}
|
}
|
||||||
|
|
||||||
return CIGAR.String()
|
return CIGAR
|
||||||
}
|
}
|
||||||
|
@@ -36,8 +36,8 @@ func randRange[T constraints.Integer](min, max int) T {
|
|||||||
|
|
||||||
func TestWavefrontPacking(t *testing.T) {
|
func TestWavefrontPacking(t *testing.T) {
|
||||||
for range 1000 {
|
for range 1000 {
|
||||||
val := randRange[uint32](0, 1000)
|
val := randRange[uint64](0, 1000)
|
||||||
tb := wfa.Traceback(randRange[uint32](0, 7))
|
tb := wfa.Traceback(randRange[uint64](0, 7))
|
||||||
v := wfa.PackWavefrontValue(val, tb)
|
v := wfa.PackWavefrontValue(val, tb)
|
||||||
|
|
||||||
valid, gotVal, gotTB := wfa.UnpackWavefrontValue(v)
|
valid, gotVal, gotTB := wfa.UnpackWavefrontValue(v)
|
||||||
|
Reference in New Issue
Block a user