Compare commits
9 Commits
Author | SHA1 | Date | |
---|---|---|---|
63a375995c | |||
2351faf2d7 | |||
a446bbd923 | |||
f1407fd045 | |||
ce635cc2b1 | |||
f5d2528e20 | |||
6f78825876 | |||
a878da42a3 | |||
bd720f06fb |
6
Makefile
6
Makefile
@@ -1,4 +1,4 @@
|
||||
.PHONY: build clean test
|
||||
.PHONY: build clean test dev-init
|
||||
|
||||
build: clean
|
||||
@echo "======================== Building Binary ======================="
|
||||
@@ -24,3 +24,7 @@ test:
|
||||
@rm -f mem.prof
|
||||
|
||||
@rm -f test.test
|
||||
|
||||
dev-init:
|
||||
apt install minify
|
||||
go get -t wfa/test
|
@@ -1,11 +1,13 @@
|
||||
# Using WFA-JS
|
||||
|
||||
Download `wfa.js` and `wfa.wasm`from [releases](https://git.tronnet.net/tronnet/WFA-JS/releases) to your project. Add to your script:
|
||||
Download `wfa.js` and `wfa.wasm`from [releases](https://git.tronnet.net/alu/WFA-JS/releases) to your project. Add to your script:
|
||||
|
||||
```
|
||||
import wfa from "./wfa.js"
|
||||
await wfa("<path to wasm>")
|
||||
console.log(wfAlign(...))
|
||||
let result = wfAlign(...)
|
||||
operations = DecodeCIGAR(result.CIGAR)
|
||||
// ...
|
||||
```
|
||||
|
||||
Where `<path to wasm>` is the path from the site root ie. `./scripts/wfa.wasm`. This will depend on your project structure.
|
10
go.mod
10
go.mod
@@ -1,15 +1,15 @@
|
||||
module wfa
|
||||
|
||||
go 1.23.2
|
||||
go 1.23.6
|
||||
|
||||
require (
|
||||
github.com/schollz/progressbar/v3 v3.17.0
|
||||
golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c
|
||||
github.com/schollz/progressbar/v3 v3.17.1
|
||||
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
||||
github.com/rivo/uniseg v0.4.7 // indirect
|
||||
golang.org/x/sys v0.26.0 // indirect
|
||||
golang.org/x/term v0.25.0 // indirect
|
||||
golang.org/x/sys v0.27.0 // indirect
|
||||
golang.org/x/term v0.26.0 // indirect
|
||||
)
|
||||
|
49
main.go
49
main.go
@@ -1,7 +1,6 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"syscall/js"
|
||||
wfa "wfa/pkg"
|
||||
)
|
||||
@@ -15,32 +14,47 @@ func main() {
|
||||
|
||||
func wfAlign(this js.Value, args []js.Value) interface{} {
|
||||
if len(args) != 4 {
|
||||
fmt.Println("invalid number of args, requires 4: s1, s2, penalties, doCIGAR")
|
||||
return nil
|
||||
resultMap := map[string]interface{}{
|
||||
"ok": false,
|
||||
"error": "invalid number of args, requires 4: s1, s2, penalties, doCIGAR",
|
||||
}
|
||||
return js.ValueOf(resultMap)
|
||||
}
|
||||
|
||||
if args[0].Type() != js.TypeString {
|
||||
fmt.Println("s1 should be a string")
|
||||
return nil
|
||||
resultMap := map[string]interface{}{
|
||||
"ok": false,
|
||||
"error": "s1 should be a string",
|
||||
}
|
||||
return js.ValueOf(resultMap)
|
||||
}
|
||||
|
||||
s1 := args[0].String()
|
||||
|
||||
if args[1].Type() != js.TypeString {
|
||||
fmt.Println("s2 should be a string")
|
||||
return nil
|
||||
resultMap := map[string]interface{}{
|
||||
"ok": false,
|
||||
"error": "s2 should be a string",
|
||||
}
|
||||
return js.ValueOf(resultMap)
|
||||
}
|
||||
|
||||
s2 := args[1].String()
|
||||
|
||||
if args[2].Type() != js.TypeObject {
|
||||
fmt.Println("penalties should be a map with key values m, x, o, e")
|
||||
return nil
|
||||
resultMap := map[string]interface{}{
|
||||
"ok": false,
|
||||
"error": "penalties should be a map with key values m, x, o, e",
|
||||
}
|
||||
return js.ValueOf(resultMap)
|
||||
}
|
||||
|
||||
if args[2].Get("m").IsUndefined() || args[2].Get("x").IsUndefined() || args[2].Get("o").IsUndefined() || args[2].Get("e").IsUndefined() {
|
||||
fmt.Println("penalties should be a map with key values m, x, o, e")
|
||||
return nil
|
||||
resultMap := map[string]interface{}{
|
||||
"ok": false,
|
||||
"error": "penalties should be a map with key values m, x, o, e",
|
||||
}
|
||||
return js.ValueOf(resultMap)
|
||||
}
|
||||
|
||||
m := args[2].Get("m").Int()
|
||||
@@ -56,8 +70,11 @@ func wfAlign(this js.Value, args []js.Value) interface{} {
|
||||
}
|
||||
|
||||
if args[3].Type() != js.TypeBoolean {
|
||||
fmt.Println("doCIGAR should be a boolean")
|
||||
return nil
|
||||
resultMap := map[string]interface{}{
|
||||
"ok": false,
|
||||
"error": "doCIGAR should be a boolean",
|
||||
}
|
||||
return js.ValueOf(resultMap)
|
||||
}
|
||||
|
||||
doCIGAR := args[3].Bool()
|
||||
@@ -65,8 +82,10 @@ func wfAlign(this js.Value, args []js.Value) interface{} {
|
||||
// Call the actual func.
|
||||
result := wfa.WFAlign(s1, s2, penalties, doCIGAR)
|
||||
resultMap := map[string]interface{}{
|
||||
"ok": true,
|
||||
"score": result.Score,
|
||||
"CIGAR": result.CIGAR,
|
||||
"error": "",
|
||||
}
|
||||
|
||||
return js.ValueOf(resultMap)
|
||||
@@ -74,12 +93,12 @@ func wfAlign(this js.Value, args []js.Value) interface{} {
|
||||
|
||||
func DecodeCIGAR(this js.Value, args []js.Value) interface{} {
|
||||
if len(args) != 1 {
|
||||
fmt.Println("invalid number of args, requires 1: CIGAR")
|
||||
println("invalid number of args, requires 1: CIGAR")
|
||||
return nil
|
||||
}
|
||||
|
||||
if args[0].Type() != js.TypeString {
|
||||
fmt.Println("CIGAR should be a string")
|
||||
println("CIGAR should be a string")
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@@ -6,50 +6,31 @@ type PositiveSlice[T any] struct {
|
||||
defaultValue T
|
||||
}
|
||||
|
||||
func (a *PositiveSlice[T]) TranslateIndex(idx int) int {
|
||||
return idx
|
||||
}
|
||||
|
||||
func (a *PositiveSlice[T]) Valid(idx int) bool {
|
||||
actualIdx := a.TranslateIndex(idx)
|
||||
return 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx]
|
||||
return 0 <= idx && idx < len(a.valid) && a.valid[idx]
|
||||
}
|
||||
|
||||
func (a *PositiveSlice[T]) Get(idx int) T {
|
||||
actualIdx := a.TranslateIndex(idx)
|
||||
if 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx] { // idx is in the slice
|
||||
return a.data[actualIdx]
|
||||
if 0 <= idx && idx < len(a.valid) && a.valid[idx] { // idx is in the slice
|
||||
return a.data[idx]
|
||||
} else { // idx is out of the slice
|
||||
return a.defaultValue
|
||||
}
|
||||
}
|
||||
|
||||
func (a *PositiveSlice[T]) Set(idx int, value T) {
|
||||
actualIdx := a.TranslateIndex(idx)
|
||||
if actualIdx < 0 || actualIdx >= len(a.valid) { // idx is outside the slice
|
||||
// expand data array to actualIdx
|
||||
newData := make([]T, 2*actualIdx+1)
|
||||
if idx >= len(a.valid) { // idx is outside the slice
|
||||
// expand data array to 2*idx
|
||||
newData := make([]T, 2*idx+1)
|
||||
copy(newData, a.data)
|
||||
a.data = newData
|
||||
|
||||
// expand valid array to actualIdx
|
||||
newValid := make([]bool, 2*actualIdx+1)
|
||||
// expand valid array to 2*idx
|
||||
newValid := make([]bool, 2*idx+1)
|
||||
copy(newValid, a.valid)
|
||||
a.valid = newValid
|
||||
}
|
||||
|
||||
a.data[actualIdx] = value
|
||||
a.valid[actualIdx] = true
|
||||
}
|
||||
|
||||
func (a *PositiveSlice[T]) Preallocate(hi int) {
|
||||
size := hi
|
||||
|
||||
// expand data array to actualIdx
|
||||
newData := make([]T, size+1)
|
||||
a.data = newData
|
||||
|
||||
// expand valid array to actualIdx
|
||||
newValid := make([]bool, size+1)
|
||||
a.valid = newValid
|
||||
a.data[idx] = value
|
||||
a.valid[idx] = true
|
||||
}
|
||||
|
16
pkg/debug.go
16
pkg/debug.go
@@ -1,3 +1,5 @@
|
||||
//go:build debug
|
||||
|
||||
package wfa
|
||||
|
||||
import (
|
||||
@@ -12,11 +14,13 @@ func (w *WavefrontComponent) String(score int) string {
|
||||
max_hi := math.MinInt
|
||||
|
||||
for i := 0; i <= score; i++ {
|
||||
if w.lo.Valid(i) && w.lo.Get(i) < min_lo {
|
||||
min_lo = w.lo.Get(i)
|
||||
valid := w.W.Valid(i)
|
||||
lo, hi := UnpackWavefrontLoHi(w.W.Get(i).lohi)
|
||||
if valid && lo < min_lo {
|
||||
min_lo = lo
|
||||
}
|
||||
if w.hi.Valid(i) && w.hi.Get(i) > max_hi {
|
||||
max_hi = w.hi.Get(i)
|
||||
if valid && hi > max_hi {
|
||||
max_hi = hi
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,9 +44,7 @@ func (w *WavefrontComponent) String(score int) string {
|
||||
|
||||
for i := 0; i <= score; i++ {
|
||||
s = s + "["
|
||||
lo := w.lo.Get(i)
|
||||
hi := w.hi.Get(i)
|
||||
// print out wavefront matrix
|
||||
lo, hi := UnpackWavefrontLoHi(w.W.Get(i).lohi)
|
||||
for k := min_lo; k <= max_hi; k++ {
|
||||
valid, val, _ := UnpackWavefrontValue(w.W.Get(i).Get(k))
|
||||
if valid {
|
||||
|
88
pkg/types.go
88
pkg/types.go
@@ -25,49 +25,60 @@ const (
|
||||
End
|
||||
)
|
||||
|
||||
// bitpacked wavefront values with 1 valid bit, 3 traceback bits, and 28 bits for the diag distance
|
||||
// technically this restricts to solutions within 268 million score but that should be sufficient for most cases
|
||||
type WavefrontValue uint32
|
||||
// bitpacked wavefront lo/hi values with 32 bits each
|
||||
type WavefrontLoHi uint64
|
||||
|
||||
// TODO: add 64 bit packed value in case more than 268 million score is needed
|
||||
func PackWavefrontLoHi(lo int, hi int) WavefrontLoHi {
|
||||
loBM := int64(int32(lo)) & 0x0000_0000_FFFF_FFFF
|
||||
hiBM := int64(int64(hi) << 32)
|
||||
return WavefrontLoHi(hiBM | loBM)
|
||||
}
|
||||
|
||||
func UnpackWavefrontLoHi(lohi WavefrontLoHi) (int, int) {
|
||||
loBM := int(int32(lohi & 0x0000_0000_FFFF_FFFF))
|
||||
hiBM := int(int32(lohi & 0xFFFF_FFFF_0000_0000 >> 32))
|
||||
return loBM, hiBM
|
||||
}
|
||||
|
||||
// bitpacked wavefront values with 1 valid bit, 3 traceback bits, and 60 bits for the diag distance
|
||||
type WavefrontValue uint64
|
||||
|
||||
// PackWavefrontValue: packs a diag value and traceback into a WavefrontValue
|
||||
func PackWavefrontValue(value uint32, traceback Traceback) WavefrontValue {
|
||||
valueBM := value & 0x0FFF_FFFF
|
||||
tracebackBM := uint32(traceback&0x0000_0007) << 28
|
||||
return WavefrontValue(0x8000_0000 | valueBM | tracebackBM)
|
||||
func PackWavefrontValue(value uint64, traceback Traceback) WavefrontValue {
|
||||
validBM := uint64(0x8000_0000_0000_0000)
|
||||
tracebackBM := uint64(traceback&0x0000_0007) << 60
|
||||
valueBM := uint64(value) & 0x0FFF_FFFF_FFFF_FFFF
|
||||
return WavefrontValue(validBM | tracebackBM | valueBM)
|
||||
}
|
||||
|
||||
// UnpackWavefrontValue: opens a WavefrontValue into a valid bool, diag value and traceback
|
||||
func UnpackWavefrontValue(wfv WavefrontValue) (bool, uint32, Traceback) {
|
||||
valueBM := uint32(wfv & 0x0FFF_FFFF)
|
||||
tracebackBM := uint8(wfv & 0x7000_0000 >> 28)
|
||||
validBM := wfv&0x8000_0000 != 0
|
||||
func UnpackWavefrontValue(wfv WavefrontValue) (bool, uint64, Traceback) {
|
||||
validBM := wfv&0x8000_0000_0000_0000 != 0
|
||||
tracebackBM := uint8(wfv & 0x7000_0000_0000_0000 >> 60)
|
||||
valueBM := uint64(wfv & 0x0000_0000_FFFF_FFFF)
|
||||
return validBM, valueBM, Traceback(tracebackBM)
|
||||
}
|
||||
|
||||
// Wavefront: stores a single wavefront, stores wavefront's lo value and hi is naturally lo + len(data)
|
||||
type Wavefront struct { // since wavefronts store diag distance, they should never be negative, and traceback data can be stored as uint8
|
||||
data []WavefrontValue
|
||||
lo int
|
||||
lohi WavefrontLoHi
|
||||
}
|
||||
|
||||
// NewWavefront: returns a new wavefront with size accomodating lo and hi (inclusive)
|
||||
func NewWavefront(lo int, hi int) *Wavefront {
|
||||
a := &Wavefront{}
|
||||
|
||||
a.lo = lo
|
||||
size := a.TranslateIndex(hi)
|
||||
|
||||
newData := make([]WavefrontValue, size+1)
|
||||
a.data = newData
|
||||
a.lohi = PackWavefrontLoHi(lo, hi)
|
||||
size := hi - lo
|
||||
a.data = make([]WavefrontValue, size+1)
|
||||
|
||||
return a
|
||||
}
|
||||
|
||||
// TranslateIndex: utility function for getting the data index given a diagonal
|
||||
func (a *Wavefront) TranslateIndex(diagonal int) int {
|
||||
return diagonal - a.lo
|
||||
lo := int(int32(a.lohi & 0x0000_0000_FFFF_FFFF))
|
||||
return diagonal - lo
|
||||
}
|
||||
|
||||
// Get: returns WavefrontValue for given diagonal
|
||||
@@ -95,27 +106,17 @@ func (a *Wavefront) Set(diagonal int, value WavefrontValue) {
|
||||
|
||||
// WavefrontComponent: each M/I/D wavefront matrix including the wavefront data, lo and hi
|
||||
type WavefrontComponent struct {
|
||||
lo *PositiveSlice[int] // lo for each wavefront
|
||||
hi *PositiveSlice[int] // hi for each wavefront
|
||||
W *PositiveSlice[*Wavefront] // wavefront diag distance and traceback for each wavefront
|
||||
}
|
||||
|
||||
// NewWavefrontComponent: returns initialized WavefrontComponent
|
||||
func NewWavefrontComponent(preallocateSize int) WavefrontComponent {
|
||||
func NewWavefrontComponent() *WavefrontComponent {
|
||||
// new wavefront component = {
|
||||
// lo = [0]
|
||||
// hi = [0]
|
||||
// W = []
|
||||
// }
|
||||
w := WavefrontComponent{
|
||||
lo: &PositiveSlice[int]{
|
||||
data: []int{0},
|
||||
valid: []bool{true},
|
||||
},
|
||||
hi: &PositiveSlice[int]{
|
||||
data: []int{0},
|
||||
valid: []bool{true},
|
||||
},
|
||||
w := &WavefrontComponent{
|
||||
W: &PositiveSlice[*Wavefront]{
|
||||
defaultValue: &Wavefront{
|
||||
data: []WavefrontValue{0},
|
||||
@@ -123,42 +124,27 @@ func NewWavefrontComponent(preallocateSize int) WavefrontComponent {
|
||||
},
|
||||
}
|
||||
|
||||
w.lo.Preallocate(preallocateSize)
|
||||
w.hi.Preallocate(preallocateSize)
|
||||
w.W.Preallocate(preallocateSize)
|
||||
|
||||
return w
|
||||
}
|
||||
|
||||
// GetVal: get value for wavefront=score, diag=k => returns ok, value, traceback
|
||||
func (w *WavefrontComponent) GetVal(score int, k int) (bool, uint32, Traceback) {
|
||||
func (w *WavefrontComponent) GetVal(score int, k int) (bool, uint64, Traceback) {
|
||||
return UnpackWavefrontValue(w.W.Get(score).Get(k))
|
||||
}
|
||||
|
||||
// SetVal: set value, traceback for wavefront=score, diag=k
|
||||
func (w *WavefrontComponent) SetVal(score int, k int, val uint32, tb Traceback) {
|
||||
func (w *WavefrontComponent) SetVal(score int, k int, val uint64, tb Traceback) {
|
||||
w.W.Get(score).Set(k, PackWavefrontValue(val, tb))
|
||||
}
|
||||
|
||||
// GetLoHi: get lo and hi for wavefront=score
|
||||
func (w *WavefrontComponent) GetLoHi(score int) (bool, int, int) {
|
||||
// if lo[score] and hi[score] are valid
|
||||
if w.lo.Valid(score) && w.hi.Valid(score) {
|
||||
// return lo[score] hi[score]
|
||||
return true, w.lo.Get(score), w.hi.Get(score)
|
||||
} else {
|
||||
return false, 0, 0
|
||||
}
|
||||
lo, hi := UnpackWavefrontLoHi(w.W.Get(score).lohi)
|
||||
return w.W.Valid(score), lo, hi
|
||||
}
|
||||
|
||||
// SetLoHi: set lo and hi for wavefront=score
|
||||
func (w *WavefrontComponent) SetLoHi(score int, lo int, hi int) {
|
||||
// lo[score] = lo
|
||||
w.lo.Set(score, lo)
|
||||
// hi[score] = hi
|
||||
w.hi.Set(score, hi)
|
||||
|
||||
// preemptively setup w.W
|
||||
b := NewWavefront(lo, hi)
|
||||
w.W.Set(score, b)
|
||||
}
|
||||
|
62
pkg/utils.go
62
pkg/utils.go
@@ -7,6 +7,7 @@ import (
|
||||
"golang.org/x/exp/constraints"
|
||||
)
|
||||
|
||||
// convert an unsigned into to string
|
||||
func UIntToString(num uint) string { // num assumed to be positive
|
||||
var builder strings.Builder
|
||||
|
||||
@@ -25,6 +26,7 @@ func UIntToString(num uint) string { // num assumed to be positive
|
||||
return string(str)
|
||||
}
|
||||
|
||||
// decode runlength encoded string such as CIGARs
|
||||
func RunLengthDecode(encoded string) string {
|
||||
decoded := strings.Builder{}
|
||||
length := len(encoded)
|
||||
@@ -51,32 +53,17 @@ func RunLengthDecode(encoded string) string {
|
||||
return decoded.String()
|
||||
}
|
||||
|
||||
// given the min index, return the item in values at that index
|
||||
func SafeMin[T constraints.Integer](values []T, idx int) T {
|
||||
return values[idx]
|
||||
}
|
||||
|
||||
// given the max index, return the item in values at that index
|
||||
func SafeMax[T constraints.Integer](values []T, idx int) T {
|
||||
return values[idx]
|
||||
}
|
||||
|
||||
func SafeArgMax[T constraints.Integer](valids []bool, values []T) (bool, int) {
|
||||
hasValid := false
|
||||
maxIndex := 0
|
||||
maxValue := math.MinInt
|
||||
for i := 0; i < len(valids); i++ {
|
||||
if valids[i] && int(values[i]) > maxValue {
|
||||
hasValid = true
|
||||
maxIndex = i
|
||||
maxValue = int(values[i])
|
||||
}
|
||||
}
|
||||
if hasValid {
|
||||
return true, maxIndex
|
||||
} else {
|
||||
return false, 0
|
||||
}
|
||||
}
|
||||
|
||||
// given array of values and corresponding array of valid flags, find the min of value which is valid or return false if there does not exist any
|
||||
func SafeArgMin[T constraints.Integer](valids []bool, values []T) (bool, int) {
|
||||
hasValid := false
|
||||
minIndex := 0
|
||||
@@ -95,7 +82,23 @@ func SafeArgMin[T constraints.Integer](valids []bool, values []T) (bool, int) {
|
||||
}
|
||||
}
|
||||
|
||||
func NextLoHi(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) (int, int) {
|
||||
// given array of values and corresponding array of valid flags, find the max of value which is valid or return false if there does not exist any
|
||||
func SafeArgMax[T constraints.Integer](valids []bool, values []T) (bool, int) {
|
||||
hasValid := false
|
||||
maxIndex := 0
|
||||
maxValue := math.MinInt
|
||||
for i := range valids {
|
||||
if valids[i] && int(values[i]) > maxValue {
|
||||
hasValid = true
|
||||
maxIndex = i
|
||||
maxValue = int(values[i])
|
||||
}
|
||||
}
|
||||
return hasValid, maxIndex
|
||||
}
|
||||
|
||||
// set the lext lo and hi bounds for wavefronts M, I, D
|
||||
func NextLoHi(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty) (int, int) {
|
||||
x := penalties.X
|
||||
o := penalties.O
|
||||
e := penalties.E
|
||||
@@ -125,35 +128,38 @@ func NextLoHi(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent,
|
||||
return lo, hi
|
||||
}
|
||||
|
||||
func NextI(M WavefrontComponent, I WavefrontComponent, score int, k int, penalties Penalty) {
|
||||
// set the traceback and diag value for the next I wavefront
|
||||
func NextI(M *WavefrontComponent, I *WavefrontComponent, score int, k int, penalties Penalty) {
|
||||
o := penalties.O
|
||||
e := penalties.E
|
||||
|
||||
a_ok, a, _ := M.GetVal(score-o-e, k-1)
|
||||
b_ok, b, _ := I.GetVal(score-e, k-1)
|
||||
|
||||
ok, nextITraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint32{a, b})
|
||||
nextIVal := SafeMax([]uint32{a, b}, nextITraceback) + 1 // important that the +1 is here
|
||||
ok, nextITraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint64{a, b})
|
||||
nextIVal := SafeMax([]uint64{a, b}, nextITraceback) + 1 // important that the +1 is here
|
||||
if ok {
|
||||
I.SetVal(score, k, nextIVal, []Traceback{OpenIns, ExtdIns}[nextITraceback])
|
||||
}
|
||||
}
|
||||
|
||||
func NextD(M WavefrontComponent, D WavefrontComponent, score int, k int, penalties Penalty) {
|
||||
// set the traceback and diag value for the next D wavefront
|
||||
func NextD(M *WavefrontComponent, D *WavefrontComponent, score int, k int, penalties Penalty) {
|
||||
o := penalties.O
|
||||
e := penalties.E
|
||||
|
||||
a_ok, a, _ := M.GetVal(score-o-e, k+1)
|
||||
b_ok, b, _ := D.GetVal(score-e, k+1)
|
||||
|
||||
ok, nextDTraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint32{a, b})
|
||||
nextDVal := SafeMax([]uint32{a, b}, nextDTraceback)
|
||||
ok, nextDTraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint64{a, b})
|
||||
nextDVal := SafeMax([]uint64{a, b}, nextDTraceback)
|
||||
if ok {
|
||||
D.SetVal(score, k, nextDVal, []Traceback{OpenDel, ExtdDel}[nextDTraceback])
|
||||
}
|
||||
}
|
||||
|
||||
func NextM(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, k int, penalties Penalty) {
|
||||
// set the traceback and diag value for the next M wavefront
|
||||
func NextM(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, k int, penalties Penalty) {
|
||||
x := penalties.X
|
||||
|
||||
a_ok, a, _ := M.GetVal(score-x, k)
|
||||
@@ -161,8 +167,8 @@ func NextM(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, sco
|
||||
b_ok, b, _ := I.GetVal(score, k)
|
||||
c_ok, c, _ := D.GetVal(score, k)
|
||||
|
||||
ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []uint32{a, b, c})
|
||||
nextMVal := SafeMax([]uint32{a, b, c}, nextMTraceback)
|
||||
ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []uint64{a, b, c})
|
||||
nextMVal := SafeMax([]uint64{a, b, c}, nextMTraceback)
|
||||
if ok {
|
||||
M.SetVal(score, k, nextMVal, []Traceback{Sub, Ins, Del}[nextMTraceback])
|
||||
}
|
||||
|
43
pkg/wfa.go
43
pkg/wfa.go
@@ -4,23 +4,23 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// WFAlign takes strings s1, s2, penalties, and returns the score and CIGAR if doCIGAR is true
|
||||
func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
|
||||
n := len(s1)
|
||||
m := len(s2)
|
||||
A_k := m - n
|
||||
A_offset := uint32(m)
|
||||
A_k := m - n // diagonal where both sequences end
|
||||
A_offset := uint64(m) // offset along a_k diagonal corresponding to end
|
||||
score := 0
|
||||
estimatedScore := (max(n, m) * max(penalties.M, penalties.X, penalties.O, penalties.E)) / 4
|
||||
M := NewWavefrontComponent(estimatedScore)
|
||||
M := NewWavefrontComponent()
|
||||
M.SetLoHi(0, 0, 0)
|
||||
M.SetVal(0, 0, 0, End)
|
||||
I := NewWavefrontComponent(estimatedScore)
|
||||
D := NewWavefrontComponent(estimatedScore)
|
||||
I := NewWavefrontComponent()
|
||||
D := NewWavefrontComponent()
|
||||
|
||||
for {
|
||||
WFExtend(M, s1, n, s2, m, score)
|
||||
ok, val, _ := M.GetVal(score, A_k)
|
||||
if ok && val >= A_offset {
|
||||
if ok && val >= A_offset { // exit when M_(s,a_k) >= A_offset, ie the wavefront has reached the end
|
||||
break
|
||||
}
|
||||
score = score + 1
|
||||
@@ -28,7 +28,7 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
|
||||
}
|
||||
|
||||
CIGAR := ""
|
||||
if doCIGAR {
|
||||
if doCIGAR { // if doCIGAR, then perform backtrace, otherwise just return the score
|
||||
CIGAR = WFBacktrace(M, I, D, score, penalties, A_k, A_offset, s1, s2)
|
||||
}
|
||||
|
||||
@@ -38,40 +38,41 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
|
||||
}
|
||||
}
|
||||
|
||||
func WFExtend(M WavefrontComponent, s1 string, n int, s2 string, m int, score int) {
|
||||
func WFExtend(M *WavefrontComponent, s1 string, n int, s2 string, m int, score int) {
|
||||
_, lo, hi := M.GetLoHi(score)
|
||||
for k := lo; k <= hi; k++ {
|
||||
for k := lo; k <= hi; k++ { // for each diagonal in current wavefront
|
||||
// v = M[score][k] - k
|
||||
// h = M[score][k]
|
||||
ok, hu, _ := M.GetVal(score, k)
|
||||
h := int(hu)
|
||||
v := h - k
|
||||
|
||||
// exit early if v or h are invalid
|
||||
ok, uh, tb := M.GetVal(score, k)
|
||||
// exit early if M_(s,l) is invalid
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for v < n && h < m && s1[v] == s2[h] {
|
||||
_, val, tb := M.GetVal(score, k)
|
||||
M.SetVal(score, k, val+1, tb)
|
||||
h := int(uh)
|
||||
v := h - k
|
||||
// in the paper, we do v++, h++, M_(s,k)++
|
||||
// however, note that h = M_(s,k) so instead we just do v++, h++ and set M_(s,k) at the end
|
||||
// this saves a some memory reads and writes
|
||||
for v < n && h < m && s1[v] == s2[h] { // extend diagonal for the next set of matches
|
||||
v++
|
||||
h++
|
||||
}
|
||||
M.SetVal(score, k, uint64(h), tb)
|
||||
}
|
||||
}
|
||||
|
||||
func WFNext(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) {
|
||||
func WFNext(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty) {
|
||||
// get this score's lo, hi
|
||||
lo, hi := NextLoHi(M, I, D, score, penalties)
|
||||
|
||||
for k := lo; k <= hi; k++ {
|
||||
for k := lo; k <= hi; k++ { // for each diagonal, extend the matrices for the next wavefronts
|
||||
NextI(M, I, score, k, penalties)
|
||||
NextD(M, D, score, k, penalties)
|
||||
NextM(M, I, D, score, k, penalties)
|
||||
}
|
||||
}
|
||||
|
||||
func WFBacktrace(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty, A_k int, A_offset uint32, s1 string, s2 string) string {
|
||||
func WFBacktrace(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty, A_k int, A_offset uint64, s1 string, s2 string) string {
|
||||
x := penalties.X
|
||||
o := penalties.O
|
||||
e := penalties.E
|
||||
|
@@ -12,6 +12,7 @@ import (
|
||||
wfa "wfa/pkg"
|
||||
|
||||
"github.com/schollz/progressbar/v3"
|
||||
"golang.org/x/exp/constraints"
|
||||
)
|
||||
|
||||
const testJsonPath = "tests.json"
|
||||
@@ -29,14 +30,14 @@ type TestCase struct {
|
||||
Solutions string `json:"solutions"`
|
||||
}
|
||||
|
||||
func randRange(min, max int) uint32 {
|
||||
return uint32(rand.IntN(max-min) + min)
|
||||
func randRange[T constraints.Integer](min, max int) T {
|
||||
return T(rand.IntN(max-min) + min)
|
||||
}
|
||||
|
||||
func TestWavefrontPacking(t *testing.T) {
|
||||
for range 1000 {
|
||||
val := randRange(0, 1000)
|
||||
tb := wfa.Traceback(randRange(0, 7))
|
||||
val := randRange[uint64](0, 1000)
|
||||
tb := wfa.Traceback(randRange[uint64](0, 7))
|
||||
v := wfa.PackWavefrontValue(val, tb)
|
||||
|
||||
valid, gotVal, gotTB := wfa.UnpackWavefrontValue(v)
|
||||
@@ -47,6 +48,20 @@ func TestWavefrontPacking(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoHiPacking(t *testing.T) {
|
||||
for range 1000 {
|
||||
lo := randRange[int](-1000, 1000)
|
||||
hi := randRange[int](-1000, 1000)
|
||||
v := wfa.PackWavefrontLoHi(lo, hi)
|
||||
|
||||
gotLo, gotHi := wfa.UnpackWavefrontLoHi(v)
|
||||
|
||||
if gotLo != lo || gotHi != hi {
|
||||
t.Errorf(`test WavefrontPack/Unpack, lo: %d, hi: %d, packedval: %x, gotlo: %d, gothi: %d`, lo, hi, v, gotLo, gotHi)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func GetScoreFromCIGAR(CIGAR string, penalties wfa.Penalty) int {
|
||||
unpackedCIGAR := wfa.RunLengthDecode(CIGAR)
|
||||
previousOp := '~'
|
||||
|
Reference in New Issue
Block a user