Compare commits
No commits in common. "main" and "v2.0.1" have entirely different histories.
2
Makefile
2
Makefile
@ -3,7 +3,7 @@
|
|||||||
build: clean
|
build: clean
|
||||||
@echo "======================== Building Binary ======================="
|
@echo "======================== Building Binary ======================="
|
||||||
minify wfa.js > dist/wfa.js
|
minify wfa.js > dist/wfa.js
|
||||||
GOOS=js GOARCH=wasm CGO_ENABLED=0 tinygo build -panic=trap -no-debug -opt=2 -target=wasm -o dist/wfa.wasm .
|
GOOS=js GOARCH=wasm CGO_ENABLED=0 tinygo build -no-debug -opt=2 -target=wasm -o dist/wfa.wasm .
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@echo "======================== Cleaning Project ======================"
|
@echo "======================== Cleaning Project ======================"
|
||||||
|
@ -5,9 +5,7 @@ Download `wfa.js` and `wfa.wasm`from [releases](https://git.tronnet.net/tronnet/
|
|||||||
```
|
```
|
||||||
import wfa from "./wfa.js"
|
import wfa from "./wfa.js"
|
||||||
await wfa("<path to wasm>")
|
await wfa("<path to wasm>")
|
||||||
let result = wfAlign(...)
|
console.log(wfAlign(...))
|
||||||
operations = DecodeCIGAR(result.CIGAR)
|
|
||||||
// ...
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Where `<path to wasm>` is the path from the site root ie. `./scripts/wfa.wasm`. This will depend on your project structure.
|
Where `<path to wasm>` is the path from the site root ie. `./scripts/wfa.wasm`. This will depend on your project structure.
|
10
go.mod
10
go.mod
@ -2,14 +2,10 @@ module wfa
|
|||||||
|
|
||||||
go 1.23.2
|
go 1.23.2
|
||||||
|
|
||||||
require (
|
|
||||||
github.com/schollz/progressbar/v3 v3.17.1
|
|
||||||
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f
|
|
||||||
)
|
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
||||||
github.com/rivo/uniseg v0.4.7 // indirect
|
github.com/rivo/uniseg v0.4.7 // indirect
|
||||||
golang.org/x/sys v0.27.0 // indirect
|
github.com/schollz/progressbar/v3 v3.16.1 // indirect
|
||||||
golang.org/x/term v0.26.0 // indirect
|
golang.org/x/sys v0.26.0 // indirect
|
||||||
|
golang.org/x/term v0.25.0 // indirect
|
||||||
)
|
)
|
||||||
|
64
main.go
64
main.go
@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"syscall/js"
|
"syscall/js"
|
||||||
wfa "wfa/pkg"
|
wfa "wfa/pkg"
|
||||||
)
|
)
|
||||||
@ -8,53 +9,37 @@ import (
|
|||||||
func main() {
|
func main() {
|
||||||
c := make(chan bool)
|
c := make(chan bool)
|
||||||
js.Global().Set("wfAlign", js.FuncOf(wfAlign))
|
js.Global().Set("wfAlign", js.FuncOf(wfAlign))
|
||||||
js.Global().Set("DecodeCIGAR", js.FuncOf(DecodeCIGAR))
|
|
||||||
<-c
|
<-c
|
||||||
}
|
}
|
||||||
|
|
||||||
func wfAlign(this js.Value, args []js.Value) interface{} {
|
func wfAlign(this js.Value, args []js.Value) interface{} {
|
||||||
if len(args) != 4 {
|
if len(args) != 4 {
|
||||||
resultMap := map[string]interface{}{
|
fmt.Println("invalid number of args, requires 4: s1, s2, penalties, doCIGAR")
|
||||||
"ok": false,
|
return nil
|
||||||
"error": "invalid number of args, requires 4: s1, s2, penalties, doCIGAR",
|
|
||||||
}
|
|
||||||
return js.ValueOf(resultMap)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if args[0].Type() != js.TypeString {
|
if args[0].Type() != js.TypeString {
|
||||||
resultMap := map[string]interface{}{
|
fmt.Println("s1 should be a string")
|
||||||
"ok": false,
|
return nil
|
||||||
"error": "s1 should be a string",
|
|
||||||
}
|
|
||||||
return js.ValueOf(resultMap)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
s1 := args[0].String()
|
s1 := args[0].String()
|
||||||
|
|
||||||
if args[1].Type() != js.TypeString {
|
if args[1].Type() != js.TypeString {
|
||||||
resultMap := map[string]interface{}{
|
fmt.Println("s2 should be a string")
|
||||||
"ok": false,
|
return nil
|
||||||
"error": "s2 should be a string",
|
|
||||||
}
|
|
||||||
return js.ValueOf(resultMap)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
s2 := args[1].String()
|
s2 := args[1].String()
|
||||||
|
|
||||||
if args[2].Type() != js.TypeObject {
|
if args[2].Type() != js.TypeObject {
|
||||||
resultMap := map[string]interface{}{
|
fmt.Println("penalties should be a map with key values m, x, o, e")
|
||||||
"ok": false,
|
return nil
|
||||||
"error": "penalties should be a map with key values m, x, o, e",
|
|
||||||
}
|
|
||||||
return js.ValueOf(resultMap)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if args[2].Get("m").IsUndefined() || args[2].Get("x").IsUndefined() || args[2].Get("o").IsUndefined() || args[2].Get("e").IsUndefined() {
|
if args[2].Get("m").IsUndefined() || args[2].Get("x").IsUndefined() || args[2].Get("o").IsUndefined() || args[2].Get("e").IsUndefined() {
|
||||||
resultMap := map[string]interface{}{
|
fmt.Println("penalties should be a map with key values m, x, o, e")
|
||||||
"ok": false,
|
return nil
|
||||||
"error": "penalties should be a map with key values m, x, o, e",
|
|
||||||
}
|
|
||||||
return js.ValueOf(resultMap)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
m := args[2].Get("m").Int()
|
m := args[2].Get("m").Int()
|
||||||
@ -70,11 +55,8 @@ func wfAlign(this js.Value, args []js.Value) interface{} {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if args[3].Type() != js.TypeBoolean {
|
if args[3].Type() != js.TypeBoolean {
|
||||||
resultMap := map[string]interface{}{
|
fmt.Println("doCIGAR should be a boolean")
|
||||||
"ok": false,
|
return nil
|
||||||
"error": "doCIGAR should be a boolean",
|
|
||||||
}
|
|
||||||
return js.ValueOf(resultMap)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
doCIGAR := args[3].Bool()
|
doCIGAR := args[3].Bool()
|
||||||
@ -82,29 +64,9 @@ func wfAlign(this js.Value, args []js.Value) interface{} {
|
|||||||
// Call the actual func.
|
// Call the actual func.
|
||||||
result := wfa.WFAlign(s1, s2, penalties, doCIGAR)
|
result := wfa.WFAlign(s1, s2, penalties, doCIGAR)
|
||||||
resultMap := map[string]interface{}{
|
resultMap := map[string]interface{}{
|
||||||
"ok": true,
|
|
||||||
"score": result.Score,
|
"score": result.Score,
|
||||||
"CIGAR": result.CIGAR,
|
"CIGAR": result.CIGAR,
|
||||||
"error": "",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return js.ValueOf(resultMap)
|
return js.ValueOf(resultMap)
|
||||||
}
|
}
|
||||||
|
|
||||||
func DecodeCIGAR(this js.Value, args []js.Value) interface{} {
|
|
||||||
if len(args) != 1 {
|
|
||||||
println("invalid number of args, requires 1: CIGAR")
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if args[0].Type() != js.TypeString {
|
|
||||||
println("CIGAR should be a string")
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
CIGAR := args[0].String()
|
|
||||||
|
|
||||||
decoded := wfa.RunLengthDecode(CIGAR)
|
|
||||||
|
|
||||||
return js.ValueOf(decoded)
|
|
||||||
}
|
|
||||||
|
@ -1,5 +1,65 @@
|
|||||||
package wfa
|
package wfa
|
||||||
|
|
||||||
|
type IntegerSlice[T any] struct {
|
||||||
|
data []T
|
||||||
|
valid []bool
|
||||||
|
defaultValue T
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *IntegerSlice[T]) TranslateIndex(idx int) int {
|
||||||
|
if idx >= 0 { // 0 -> 0, 1 -> 2, 2 -> 4, 3 -> 6, ...
|
||||||
|
return 2 * idx
|
||||||
|
} else { // -1 -> 1, -2 -> 3, -3 -> 5, ...
|
||||||
|
return (-2 * idx) - 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *IntegerSlice[T]) Valid(idx int) bool {
|
||||||
|
actualIdx := a.TranslateIndex(idx)
|
||||||
|
return 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *IntegerSlice[T]) Get(idx int) T {
|
||||||
|
actualIdx := a.TranslateIndex(idx)
|
||||||
|
if 0 <= actualIdx && actualIdx < len(a.valid) && a.valid[actualIdx] { // idx is in the slice
|
||||||
|
return a.data[actualIdx]
|
||||||
|
} else { // idx is out of the slice
|
||||||
|
return a.defaultValue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *IntegerSlice[T]) Set(idx int, value T) {
|
||||||
|
actualIdx := a.TranslateIndex(idx)
|
||||||
|
if actualIdx >= len(a.valid) { // idx is outside the slice
|
||||||
|
// expand data array to actualIdx
|
||||||
|
newData := make([]T, 2*actualIdx+1)
|
||||||
|
copy(newData, a.data)
|
||||||
|
a.data = newData
|
||||||
|
|
||||||
|
// expand valid array to actualIdx
|
||||||
|
newValid := make([]bool, 2*actualIdx+1)
|
||||||
|
copy(newValid, a.valid)
|
||||||
|
a.valid = newValid
|
||||||
|
}
|
||||||
|
|
||||||
|
a.data[actualIdx] = value
|
||||||
|
a.valid[actualIdx] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *IntegerSlice[T]) Preallocate(lo int, hi int) {
|
||||||
|
actualLo := a.TranslateIndex(lo)
|
||||||
|
actualHi := a.TranslateIndex(hi)
|
||||||
|
size := max(actualHi, actualLo)
|
||||||
|
|
||||||
|
// expand data array to actualIdx
|
||||||
|
newData := make([]T, size+1)
|
||||||
|
a.data = newData
|
||||||
|
|
||||||
|
// expand valid array to actualIdx
|
||||||
|
newValid := make([]bool, size+1)
|
||||||
|
a.valid = newValid
|
||||||
|
}
|
||||||
|
|
||||||
type PositiveSlice[T any] struct {
|
type PositiveSlice[T any] struct {
|
||||||
data []T
|
data []T
|
||||||
valid []bool
|
valid []bool
|
||||||
@ -41,3 +101,15 @@ func (a *PositiveSlice[T]) Set(idx int, value T) {
|
|||||||
a.data[actualIdx] = value
|
a.data[actualIdx] = value
|
||||||
a.valid[actualIdx] = true
|
a.valid[actualIdx] = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (a *PositiveSlice[T]) Preallocate(hi int) {
|
||||||
|
size := hi
|
||||||
|
|
||||||
|
// expand data array to actualIdx
|
||||||
|
newData := make([]T, size+1)
|
||||||
|
a.data = newData
|
||||||
|
|
||||||
|
// expand valid array to actualIdx
|
||||||
|
newValid := make([]bool, size+1)
|
||||||
|
a.valid = newValid
|
||||||
|
}
|
||||||
|
81
pkg/debug.go
81
pkg/debug.go
@ -1,81 +0,0 @@
|
|||||||
//go:build debug
|
|
||||||
|
|
||||||
package wfa
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"math"
|
|
||||||
)
|
|
||||||
|
|
||||||
func (w *WavefrontComponent) String(score int) string {
|
|
||||||
traceback_str := []string{"OI", "EI", "OD", "ED", "SB", "IN", "DL", "EN"}
|
|
||||||
s := "<"
|
|
||||||
min_lo := math.MaxInt
|
|
||||||
max_hi := math.MinInt
|
|
||||||
|
|
||||||
for i := 0; i <= score; i++ {
|
|
||||||
valid := w.W.Valid(i)
|
|
||||||
lo, hi := UnpackWavefrontLoHi(w.W.Get(i).lohi)
|
|
||||||
if valid && lo < min_lo {
|
|
||||||
min_lo = lo
|
|
||||||
}
|
|
||||||
if valid && hi > max_hi {
|
|
||||||
max_hi = hi
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for k := min_lo; k <= max_hi; k++ {
|
|
||||||
s = s + fmt.Sprintf("%02d", k)
|
|
||||||
if k < max_hi {
|
|
||||||
s = s + "|"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
s = s + ">\t<"
|
|
||||||
|
|
||||||
for k := min_lo; k <= max_hi; k++ {
|
|
||||||
s = s + fmt.Sprintf("%02d", k)
|
|
||||||
if k < max_hi {
|
|
||||||
s = s + "|"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
s = s + ">\n"
|
|
||||||
|
|
||||||
for i := 0; i <= score; i++ {
|
|
||||||
s = s + "["
|
|
||||||
lo, hi := UnpackWavefrontLoHi(w.W.Get(i).lohi)
|
|
||||||
for k := min_lo; k <= max_hi; k++ {
|
|
||||||
valid, val, _ := UnpackWavefrontValue(w.W.Get(i).Get(k))
|
|
||||||
if valid {
|
|
||||||
s = s + fmt.Sprintf("%02d", val)
|
|
||||||
} else if k < lo || k > hi {
|
|
||||||
s = s + "--"
|
|
||||||
} else {
|
|
||||||
s = s + " "
|
|
||||||
}
|
|
||||||
|
|
||||||
if k < max_hi {
|
|
||||||
s = s + "|"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s = s + "]\t["
|
|
||||||
// print out traceback matrix
|
|
||||||
for k := min_lo; k <= max_hi; k++ {
|
|
||||||
valid, _, tb := UnpackWavefrontValue(w.W.Get(i).Get(k))
|
|
||||||
if valid {
|
|
||||||
s = s + traceback_str[tb]
|
|
||||||
} else if k < lo || k > hi {
|
|
||||||
s = s + "--"
|
|
||||||
} else {
|
|
||||||
s = s + " "
|
|
||||||
}
|
|
||||||
|
|
||||||
if k < max_hi {
|
|
||||||
s = s + "|"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s = s + "]\n"
|
|
||||||
}
|
|
||||||
return s
|
|
||||||
}
|
|
252
pkg/types.go
252
pkg/types.go
@ -1,5 +1,10 @@
|
|||||||
package wfa
|
package wfa
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
)
|
||||||
|
|
||||||
type Result struct {
|
type Result struct {
|
||||||
Score int
|
Score int
|
||||||
CIGAR string
|
CIGAR string
|
||||||
@ -12,10 +17,10 @@ type Penalty struct {
|
|||||||
E int
|
E int
|
||||||
}
|
}
|
||||||
|
|
||||||
type Traceback byte
|
type traceback byte
|
||||||
|
|
||||||
const (
|
const (
|
||||||
OpenIns Traceback = iota
|
OpenIns traceback = iota
|
||||||
ExtdIns
|
ExtdIns
|
||||||
OpenDel
|
OpenDel
|
||||||
ExtdDel
|
ExtdDel
|
||||||
@ -25,132 +30,165 @@ const (
|
|||||||
End
|
End
|
||||||
)
|
)
|
||||||
|
|
||||||
// bitpacked wavefront lo/hi values with 32 bits each
|
|
||||||
type WavefrontLoHi uint64
|
|
||||||
|
|
||||||
func PackWavefrontLoHi(lo int, hi int) WavefrontLoHi {
|
|
||||||
loBM := int64(int32(lo)) & 0x0000_0000_FFFF_FFFF
|
|
||||||
hiBM := int64(int64(hi) << 32)
|
|
||||||
return WavefrontLoHi(hiBM | loBM)
|
|
||||||
}
|
|
||||||
|
|
||||||
func UnpackWavefrontLoHi(lohi WavefrontLoHi) (int, int) {
|
|
||||||
loBM := int(int32(lohi & 0x0000_0000_FFFF_FFFF))
|
|
||||||
hiBM := int(int32(lohi & 0xFFFF_FFFF_0000_0000 >> 32))
|
|
||||||
return loBM, hiBM
|
|
||||||
}
|
|
||||||
|
|
||||||
// bitpacked wavefront values with 1 valid bit, 3 traceback bits, and 28 bits for the diag distance
|
|
||||||
// technically this restricts to alignments with less than 268 million characters but that should be sufficient for most cases
|
|
||||||
type WavefrontValue uint32
|
|
||||||
|
|
||||||
// TODO: add 64 bit packed value in case more than 268 million characters are needed
|
|
||||||
|
|
||||||
// PackWavefrontValue: packs a diag value and traceback into a WavefrontValue
|
|
||||||
func PackWavefrontValue(value uint32, traceback Traceback) WavefrontValue {
|
|
||||||
validBM := uint32(0x8000_0000)
|
|
||||||
tracebackBM := uint32(traceback&0x0000_0007) << 28
|
|
||||||
valueBM := value & 0x0FFF_FFFF
|
|
||||||
return WavefrontValue(validBM | tracebackBM | valueBM)
|
|
||||||
}
|
|
||||||
|
|
||||||
// UnpackWavefrontValue: opens a WavefrontValue into a valid bool, diag value and traceback
|
|
||||||
func UnpackWavefrontValue(wfv WavefrontValue) (bool, uint32, Traceback) {
|
|
||||||
validBM := wfv&0x8000_0000 != 0
|
|
||||||
tracebackBM := uint8(wfv & 0x7000_0000 >> 28)
|
|
||||||
valueBM := uint32(wfv & 0x0FFF_FFFF)
|
|
||||||
return validBM, valueBM, Traceback(tracebackBM)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wavefront: stores a single wavefront, stores wavefront's lo value and hi is naturally lo + len(data)
|
|
||||||
type Wavefront struct { // since wavefronts store diag distance, they should never be negative, and traceback data can be stored as uint8
|
|
||||||
data []WavefrontValue
|
|
||||||
lohi WavefrontLoHi
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewWavefront: returns a new wavefront with size accomodating lo and hi (inclusive)
|
|
||||||
func NewWavefront(lo int, hi int) *Wavefront {
|
|
||||||
a := &Wavefront{}
|
|
||||||
|
|
||||||
a.lohi = PackWavefrontLoHi(lo, hi)
|
|
||||||
size := hi - lo
|
|
||||||
|
|
||||||
newData := make([]WavefrontValue, size+1)
|
|
||||||
a.data = newData
|
|
||||||
|
|
||||||
return a
|
|
||||||
}
|
|
||||||
|
|
||||||
// TranslateIndex: utility function for getting the data index given a diagonal
|
|
||||||
func (a *Wavefront) TranslateIndex(diagonal int) int {
|
|
||||||
lo := int(int32(a.lohi & 0x0000_0000_FFFF_FFFF))
|
|
||||||
return diagonal - lo
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get: returns WavefrontValue for given diagonal
|
|
||||||
func (a *Wavefront) Get(diagonal int) WavefrontValue {
|
|
||||||
actualIdx := a.TranslateIndex(diagonal)
|
|
||||||
if 0 <= actualIdx && actualIdx < len(a.data) { // idx is in the slice
|
|
||||||
return a.data[actualIdx]
|
|
||||||
} else { // idx is out of the slice
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set: the diagonal to a WavefrontValue
|
|
||||||
func (a *Wavefront) Set(diagonal int, value WavefrontValue) {
|
|
||||||
actualIdx := a.TranslateIndex(diagonal)
|
|
||||||
|
|
||||||
/* in theory idx is always in bounds because the wavefront is preallocated
|
|
||||||
if actualIdx < 0 || actualIdx >= len(a.data) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
a.data[actualIdx] = value
|
|
||||||
}
|
|
||||||
|
|
||||||
// WavefrontComponent: each M/I/D wavefront matrix including the wavefront data, lo and hi
|
|
||||||
type WavefrontComponent struct {
|
type WavefrontComponent struct {
|
||||||
W *PositiveSlice[*Wavefront] // wavefront diag distance and traceback for each wavefront
|
lo *PositiveSlice[int] // lo for each wavefront
|
||||||
|
hi *PositiveSlice[int] // hi for each wavefront
|
||||||
|
W *PositiveSlice[*IntegerSlice[int]] // wavefront diag distance for each wavefront
|
||||||
|
A *PositiveSlice[*IntegerSlice[traceback]] // compact CIGAR for backtrace for each wavefront
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewWavefrontComponent: returns initialized WavefrontComponent
|
func NewWavefrontComponent(preallocateSize int) WavefrontComponent {
|
||||||
func NewWavefrontComponent() *WavefrontComponent {
|
|
||||||
// new wavefront component = {
|
// new wavefront component = {
|
||||||
// lo = [0]
|
// lo = [0]
|
||||||
// hi = [0]
|
// hi = [0]
|
||||||
// W = []
|
// W = []
|
||||||
|
// A = []
|
||||||
// }
|
// }
|
||||||
w := &WavefrontComponent{
|
w := WavefrontComponent{
|
||||||
W: &PositiveSlice[*Wavefront]{
|
lo: &PositiveSlice[int]{
|
||||||
defaultValue: &Wavefront{
|
data: []int{0},
|
||||||
data: []WavefrontValue{0},
|
valid: []bool{true},
|
||||||
|
},
|
||||||
|
hi: &PositiveSlice[int]{
|
||||||
|
data: []int{0},
|
||||||
|
valid: []bool{true},
|
||||||
|
},
|
||||||
|
W: &PositiveSlice[*IntegerSlice[int]]{
|
||||||
|
defaultValue: &IntegerSlice[int]{
|
||||||
|
data: []int{},
|
||||||
|
valid: []bool{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
A: &PositiveSlice[*IntegerSlice[traceback]]{
|
||||||
|
defaultValue: &IntegerSlice[traceback]{
|
||||||
|
data: []traceback{},
|
||||||
|
valid: []bool{},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
w.lo.Preallocate(preallocateSize)
|
||||||
|
w.hi.Preallocate(preallocateSize)
|
||||||
|
w.W.Preallocate(preallocateSize)
|
||||||
|
w.A.Preallocate(preallocateSize)
|
||||||
|
|
||||||
return w
|
return w
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetVal: get value for wavefront=score, diag=k => returns ok, value, traceback
|
// get value for wavefront=score, diag=k => returns ok, value
|
||||||
func (w *WavefrontComponent) GetVal(score int, k int) (bool, uint32, Traceback) {
|
func (w *WavefrontComponent) GetVal(score int, k int) (bool, int) {
|
||||||
return UnpackWavefrontValue(w.W.Get(score).Get(k))
|
return w.W.Valid(score) && w.W.Get(score).Valid(k), w.W.Get(score).Get(k)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetVal: set value, traceback for wavefront=score, diag=k
|
// set value for wavefront=score, diag=k
|
||||||
func (w *WavefrontComponent) SetVal(score int, k int, val uint32, tb Traceback) {
|
func (w *WavefrontComponent) SetVal(score int, k int, val int) {
|
||||||
w.W.Get(score).Set(k, PackWavefrontValue(val, tb))
|
w.W.Get(score).Set(k, val)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetLoHi: get lo and hi for wavefront=score
|
// get alignment traceback for wavefront=score, diag=k => returns ok, value
|
||||||
|
func (w *WavefrontComponent) GetTraceback(score int, k int) (bool, traceback) {
|
||||||
|
return w.A.Valid(score) && w.A.Get(score).Valid(k), w.A.Get(score).Get(k)
|
||||||
|
}
|
||||||
|
|
||||||
|
// set alignment traceback for wavefront=score, diag=k
|
||||||
|
func (w *WavefrontComponent) SetTraceback(score int, k int, val traceback) {
|
||||||
|
w.A.Get(score).Set(k, val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// get hi for wavefront=score
|
||||||
func (w *WavefrontComponent) GetLoHi(score int) (bool, int, int) {
|
func (w *WavefrontComponent) GetLoHi(score int) (bool, int, int) {
|
||||||
lo, hi := UnpackWavefrontLoHi(w.W.Get(score).lohi)
|
// if lo[score] and hi[score] are valid
|
||||||
return w.W.Valid(score), lo, hi
|
if w.lo.Valid(score) && w.hi.Valid(score) {
|
||||||
|
// return lo[score] hi[score]
|
||||||
|
return true, w.lo.Get(score), w.hi.Get(score)
|
||||||
|
} else {
|
||||||
|
return false, 0, 0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetLoHi: set lo and hi for wavefront=score
|
// set hi for wavefront=score
|
||||||
func (w *WavefrontComponent) SetLoHi(score int, lo int, hi int) {
|
func (w *WavefrontComponent) SetLoHi(score int, lo int, hi int) {
|
||||||
b := NewWavefront(lo, hi)
|
// lo[score] = lo
|
||||||
w.W.Set(score, b)
|
w.lo.Set(score, lo)
|
||||||
|
// hi[score] = hi
|
||||||
|
w.hi.Set(score, hi)
|
||||||
|
|
||||||
|
// preemptively setup w.A
|
||||||
|
w.A.Set(score, &IntegerSlice[traceback]{})
|
||||||
|
w.A.Get(score).Preallocate(lo, hi)
|
||||||
|
|
||||||
|
// preemptively setup w.W
|
||||||
|
w.W.Set(score, &IntegerSlice[int]{})
|
||||||
|
w.W.Get(score).Preallocate(lo, hi)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *WavefrontComponent) String(score int) string {
|
||||||
|
traceback_str := []string{"OI", "EI", "OD", "ED", "SB", "IN", "DL", "EN"}
|
||||||
|
s := "<"
|
||||||
|
min_lo := math.MaxInt
|
||||||
|
max_hi := math.MinInt
|
||||||
|
|
||||||
|
for i := 0; i <= score; i++ {
|
||||||
|
if w.lo.Valid(i) && w.lo.Get(i) < min_lo {
|
||||||
|
min_lo = w.lo.Get(i)
|
||||||
|
}
|
||||||
|
if w.hi.Valid(i) && w.hi.Get(i) > max_hi {
|
||||||
|
max_hi = w.hi.Get(i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for k := min_lo; k <= max_hi; k++ {
|
||||||
|
s = s + fmt.Sprintf("%02d", k)
|
||||||
|
if k < max_hi {
|
||||||
|
s = s + "|"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s = s + ">\t<"
|
||||||
|
|
||||||
|
for k := min_lo; k <= max_hi; k++ {
|
||||||
|
s = s + fmt.Sprintf("%02d", k)
|
||||||
|
if k < max_hi {
|
||||||
|
s = s + "|"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s = s + ">\n"
|
||||||
|
|
||||||
|
for i := 0; i <= score; i++ {
|
||||||
|
s = s + "["
|
||||||
|
lo := w.lo.Get(i)
|
||||||
|
hi := w.hi.Get(i)
|
||||||
|
// print out wavefront matrix
|
||||||
|
for k := min_lo; k <= max_hi; k++ {
|
||||||
|
if w.W.Valid(i) && w.W.Get(i).Valid(k) {
|
||||||
|
s = s + fmt.Sprintf("%02d", w.W.Get(i).Get(k))
|
||||||
|
} else if k < lo || k > hi {
|
||||||
|
s = s + "--"
|
||||||
|
} else {
|
||||||
|
s = s + " "
|
||||||
|
}
|
||||||
|
|
||||||
|
if k < max_hi {
|
||||||
|
s = s + "|"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s = s + "]\t["
|
||||||
|
// print out traceback matrix
|
||||||
|
for k := min_lo; k <= max_hi; k++ {
|
||||||
|
if w.A.Valid(i) && w.A.Get(i).Valid(k) {
|
||||||
|
s = s + traceback_str[w.A.Get(i).Get(k)]
|
||||||
|
} else if k < lo || k > hi {
|
||||||
|
s = s + "--"
|
||||||
|
} else {
|
||||||
|
s = s + " "
|
||||||
|
}
|
||||||
|
|
||||||
|
if k < max_hi {
|
||||||
|
s = s + "|"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s = s + "]\n"
|
||||||
|
}
|
||||||
|
return s
|
||||||
}
|
}
|
||||||
|
126
pkg/utils.go
126
pkg/utils.go
@ -2,72 +2,26 @@ package wfa
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"math"
|
"math"
|
||||||
"strings"
|
"unicode/utf8"
|
||||||
|
|
||||||
"golang.org/x/exp/constraints"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func UIntToString(num uint) string { // num assumed to be positive
|
func SafeMin(values []int, idx int) int {
|
||||||
var builder strings.Builder
|
|
||||||
|
|
||||||
for num > 0 {
|
|
||||||
digit := num % 10
|
|
||||||
builder.WriteRune(rune('0' + digit))
|
|
||||||
num /= 10
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reverse the string as we built it in reverse order
|
|
||||||
str := []rune(builder.String())
|
|
||||||
for i, j := 0, len(str)-1; i < j; i, j = i+1, j-1 {
|
|
||||||
str[i], str[j] = str[j], str[i]
|
|
||||||
}
|
|
||||||
|
|
||||||
return string(str)
|
|
||||||
}
|
|
||||||
|
|
||||||
func RunLengthDecode(encoded string) string {
|
|
||||||
decoded := strings.Builder{}
|
|
||||||
length := len(encoded)
|
|
||||||
i := 0
|
|
||||||
|
|
||||||
for i < length {
|
|
||||||
// If the current character is a digit, we need to extract the run length
|
|
||||||
runLength := 0
|
|
||||||
for i < length && encoded[i] >= '0' && encoded[i] <= '9' {
|
|
||||||
runLength = runLength*10 + int(encoded[i]-'0')
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
|
|
||||||
// The next character will be the character to repeat
|
|
||||||
if i < length {
|
|
||||||
char := encoded[i]
|
|
||||||
for j := 0; j < runLength; j++ {
|
|
||||||
decoded.WriteByte(char)
|
|
||||||
}
|
|
||||||
i++ // Move past the character
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return decoded.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
func SafeMin[T constraints.Integer](values []T, idx int) T {
|
|
||||||
return values[idx]
|
return values[idx]
|
||||||
}
|
}
|
||||||
|
|
||||||
func SafeMax[T constraints.Integer](values []T, idx int) T {
|
func SafeMax(values []int, idx int) int {
|
||||||
return values[idx]
|
return values[idx]
|
||||||
}
|
}
|
||||||
|
|
||||||
func SafeArgMax[T constraints.Integer](valids []bool, values []T) (bool, int) {
|
func SafeArgMax(valids []bool, values []int) (bool, int) {
|
||||||
hasValid := false
|
hasValid := false
|
||||||
maxIndex := 0
|
maxIndex := 0
|
||||||
maxValue := math.MinInt
|
maxValue := math.MinInt
|
||||||
for i := 0; i < len(valids); i++ {
|
for i := 0; i < len(valids); i++ {
|
||||||
if valids[i] && int(values[i]) > maxValue {
|
if valids[i] && values[i] > maxValue {
|
||||||
hasValid = true
|
hasValid = true
|
||||||
maxIndex = i
|
maxIndex = i
|
||||||
maxValue = int(values[i])
|
maxValue = values[i]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if hasValid {
|
if hasValid {
|
||||||
@ -77,15 +31,15 @@ func SafeArgMax[T constraints.Integer](valids []bool, values []T) (bool, int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func SafeArgMin[T constraints.Integer](valids []bool, values []T) (bool, int) {
|
func SafeArgMin(valids []bool, values []int) (bool, int) {
|
||||||
hasValid := false
|
hasValid := false
|
||||||
minIndex := 0
|
minIndex := 0
|
||||||
minValue := math.MaxInt
|
minValue := math.MaxInt
|
||||||
for i := 0; i < len(valids); i++ {
|
for i := 0; i < len(valids); i++ {
|
||||||
if valids[i] && int(values[i]) < minValue {
|
if valids[i] && values[i] < minValue {
|
||||||
hasValid = true
|
hasValid = true
|
||||||
minIndex = i
|
minIndex = i
|
||||||
minValue = int(values[i])
|
minValue = values[i]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if hasValid {
|
if hasValid {
|
||||||
@ -95,7 +49,22 @@ func SafeArgMin[T constraints.Integer](valids []bool, values []T) (bool, int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func NextLoHi(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty) (int, int) {
|
func Reverse(s string) string {
|
||||||
|
size := len(s)
|
||||||
|
buf := make([]byte, size)
|
||||||
|
for start := 0; start < size; {
|
||||||
|
r, n := utf8.DecodeRuneInString(s[start:])
|
||||||
|
start += n
|
||||||
|
utf8.EncodeRune(buf[size-start:], r)
|
||||||
|
}
|
||||||
|
return string(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
func Splice(s string, c rune, idx int) string {
|
||||||
|
return s[:idx] + string(c) + s[idx:]
|
||||||
|
}
|
||||||
|
|
||||||
|
func NextLoHi(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) (int, int) {
|
||||||
x := penalties.X
|
x := penalties.X
|
||||||
o := penalties.O
|
o := penalties.O
|
||||||
e := penalties.E
|
e := penalties.E
|
||||||
@ -125,45 +94,52 @@ func NextLoHi(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponen
|
|||||||
return lo, hi
|
return lo, hi
|
||||||
}
|
}
|
||||||
|
|
||||||
func NextI(M *WavefrontComponent, I *WavefrontComponent, score int, k int, penalties Penalty) {
|
func NextI(M WavefrontComponent, I WavefrontComponent, score int, k int, penalties Penalty) {
|
||||||
o := penalties.O
|
o := penalties.O
|
||||||
e := penalties.E
|
e := penalties.E
|
||||||
|
|
||||||
a_ok, a, _ := M.GetVal(score-o-e, k-1)
|
a_ok, a := M.GetVal(score-o-e, k-1)
|
||||||
b_ok, b, _ := I.GetVal(score-e, k-1)
|
b_ok, b := I.GetVal(score-e, k-1)
|
||||||
|
|
||||||
ok, nextITraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint32{a, b})
|
ok, nextITraceback := SafeArgMax([]bool{a_ok, b_ok}, []int{a, b})
|
||||||
nextIVal := SafeMax([]uint32{a, b}, nextITraceback) + 1 // important that the +1 is here
|
nextIVal := SafeMax([]int{a, b}, nextITraceback) + 1 // important that the +1 is here
|
||||||
if ok {
|
if ok {
|
||||||
I.SetVal(score, k, nextIVal, []Traceback{OpenIns, ExtdIns}[nextITraceback])
|
I.SetVal(score, k, nextIVal)
|
||||||
|
I.SetTraceback(score, k, []traceback{OpenIns, ExtdIns}[nextITraceback])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func NextD(M *WavefrontComponent, D *WavefrontComponent, score int, k int, penalties Penalty) {
|
func NextD(M WavefrontComponent, D WavefrontComponent, score int, k int, penalties Penalty) {
|
||||||
o := penalties.O
|
o := penalties.O
|
||||||
e := penalties.E
|
e := penalties.E
|
||||||
|
|
||||||
a_ok, a, _ := M.GetVal(score-o-e, k+1)
|
a_ok, a := M.GetVal(score-o-e, k+1)
|
||||||
b_ok, b, _ := D.GetVal(score-e, k+1)
|
b_ok, b := D.GetVal(score-e, k+1)
|
||||||
|
|
||||||
ok, nextDTraceback := SafeArgMax([]bool{a_ok, b_ok}, []uint32{a, b})
|
ok, nextDTraceback := SafeArgMax(
|
||||||
nextDVal := SafeMax([]uint32{a, b}, nextDTraceback)
|
[]bool{a_ok, b_ok},
|
||||||
|
[]int{a, b},
|
||||||
|
)
|
||||||
|
nextDVal := SafeMax([]int{a, b}, nextDTraceback) // nothing special
|
||||||
if ok {
|
if ok {
|
||||||
D.SetVal(score, k, nextDVal, []Traceback{OpenDel, ExtdDel}[nextDTraceback])
|
D.SetVal(score, k, nextDVal)
|
||||||
|
D.SetTraceback(score, k, []traceback{OpenDel, ExtdDel}[nextDTraceback])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func NextM(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, k int, penalties Penalty) {
|
func NextM(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, k int, penalties Penalty) {
|
||||||
x := penalties.X
|
x := penalties.X
|
||||||
|
|
||||||
a_ok, a, _ := M.GetVal(score-x, k)
|
a_ok, a := M.GetVal(score-x, k)
|
||||||
a++ // important to have +1 here
|
a++ // important to have +1 here
|
||||||
b_ok, b, _ := I.GetVal(score, k)
|
b_ok, b := I.GetVal(score, k)
|
||||||
c_ok, c, _ := D.GetVal(score, k)
|
c_ok, c := D.GetVal(score, k)
|
||||||
|
|
||||||
|
ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []int{a, b, c})
|
||||||
|
nextMVal := SafeMax([]int{a, b, c}, nextMTraceback)
|
||||||
|
|
||||||
ok, nextMTraceback := SafeArgMax([]bool{a_ok, b_ok, c_ok}, []uint32{a, b, c})
|
|
||||||
nextMVal := SafeMax([]uint32{a, b, c}, nextMTraceback)
|
|
||||||
if ok {
|
if ok {
|
||||||
M.SetVal(score, k, nextMVal, []Traceback{Sub, Ins, Del}[nextMTraceback])
|
M.SetVal(score, k, nextMVal)
|
||||||
|
M.SetTraceback(score, k, []traceback{Sub, Ins, Del}[nextMTraceback])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
153
pkg/wfa.go
153
pkg/wfa.go
@ -1,24 +1,22 @@
|
|||||||
package wfa
|
package wfa
|
||||||
|
|
||||||
import (
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
|
func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
|
||||||
n := len(s1)
|
n := len(s1)
|
||||||
m := len(s2)
|
m := len(s2)
|
||||||
A_k := m - n
|
A_k := m - n
|
||||||
A_offset := uint32(m)
|
A_offset := m
|
||||||
score := 0
|
score := 0
|
||||||
M := NewWavefrontComponent()
|
estimatedScore := (max(n, m) * max(penalties.M, penalties.X, penalties.O, penalties.E)) / 4
|
||||||
|
M := NewWavefrontComponent(estimatedScore)
|
||||||
M.SetLoHi(0, 0, 0)
|
M.SetLoHi(0, 0, 0)
|
||||||
M.SetVal(0, 0, 0, End)
|
M.SetVal(0, 0, 0)
|
||||||
I := NewWavefrontComponent()
|
M.SetTraceback(0, 0, End)
|
||||||
D := NewWavefrontComponent()
|
I := NewWavefrontComponent(estimatedScore)
|
||||||
|
D := NewWavefrontComponent(estimatedScore)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
WFExtend(M, s1, n, s2, m, score)
|
WFExtend(M, s1, n, s2, m, score)
|
||||||
ok, val, _ := M.GetVal(score, A_k)
|
ok, val := M.GetVal(score, A_k)
|
||||||
if ok && val >= A_offset {
|
if ok && val >= A_offset {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@ -28,7 +26,7 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
|
|||||||
|
|
||||||
CIGAR := ""
|
CIGAR := ""
|
||||||
if doCIGAR {
|
if doCIGAR {
|
||||||
CIGAR = WFBacktrace(M, I, D, score, penalties, A_k, A_offset, s1, s2)
|
CIGAR = WFBacktrace(M, I, D, score, penalties, A_k, s1, s2)
|
||||||
}
|
}
|
||||||
|
|
||||||
return Result{
|
return Result{
|
||||||
@ -37,13 +35,12 @@ func WFAlign(s1 string, s2 string, penalties Penalty, doCIGAR bool) Result {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func WFExtend(M *WavefrontComponent, s1 string, n int, s2 string, m int, score int) {
|
func WFExtend(M WavefrontComponent, s1 string, n int, s2 string, m int, score int) {
|
||||||
_, lo, hi := M.GetLoHi(score)
|
_, lo, hi := M.GetLoHi(score)
|
||||||
for k := lo; k <= hi; k++ {
|
for k := lo; k <= hi; k++ {
|
||||||
// v = M[score][k] - k
|
// v = M[score][k] - k
|
||||||
// h = M[score][k]
|
// h = M[score][k]
|
||||||
ok, hu, _ := M.GetVal(score, k)
|
ok, h := M.GetVal(score, k)
|
||||||
h := int(hu)
|
|
||||||
v := h - k
|
v := h - k
|
||||||
|
|
||||||
// exit early if v or h are invalid
|
// exit early if v or h are invalid
|
||||||
@ -51,15 +48,15 @@ func WFExtend(M *WavefrontComponent, s1 string, n int, s2 string, m int, score i
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
for v < n && h < m && s1[v] == s2[h] {
|
for v < n && h < m && s1[v] == s2[h] {
|
||||||
_, val, tb := M.GetVal(score, k)
|
_, val := M.GetVal(score, k)
|
||||||
M.SetVal(score, k, val+1, tb)
|
M.SetVal(score, k, val+1)
|
||||||
v++
|
v++
|
||||||
h++
|
h++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func WFNext(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty) {
|
func WFNext(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty) {
|
||||||
// get this score's lo, hi
|
// get this score's lo, hi
|
||||||
lo, hi := NextLoHi(M, I, D, score, penalties)
|
lo, hi := NextLoHi(M, I, D, score, penalties)
|
||||||
|
|
||||||
@ -70,128 +67,76 @@ func WFNext(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func WFBacktrace(M *WavefrontComponent, I *WavefrontComponent, D *WavefrontComponent, score int, penalties Penalty, A_k int, A_offset uint32, s1 string, s2 string) string {
|
func WFBacktrace(M WavefrontComponent, I WavefrontComponent, D WavefrontComponent, score int, penalties Penalty, A_k int, s1 string, s2 string) string {
|
||||||
|
traceback_CIGAR := []string{"I", "I", "D", "D", "X", "", "", ""}
|
||||||
x := penalties.X
|
x := penalties.X
|
||||||
o := penalties.O
|
o := penalties.O
|
||||||
e := penalties.E
|
e := penalties.E
|
||||||
|
CIGAR_rev := ""
|
||||||
tb_s := score
|
tb_s := score
|
||||||
tb_k := A_k
|
tb_k := A_k
|
||||||
|
_, current_traceback := M.GetTraceback(tb_s, tb_k)
|
||||||
done := false
|
done := false
|
||||||
|
|
||||||
_, current_dist, current_traceback := M.GetVal(tb_s, tb_k)
|
|
||||||
|
|
||||||
Ops := []rune{'~'}
|
|
||||||
Counts := []uint{0}
|
|
||||||
idx := 0
|
|
||||||
|
|
||||||
for !done {
|
for !done {
|
||||||
|
CIGAR_rev = CIGAR_rev + traceback_CIGAR[current_traceback]
|
||||||
switch current_traceback {
|
switch current_traceback {
|
||||||
case OpenIns:
|
case OpenIns:
|
||||||
if Ops[idx] == 'I' {
|
|
||||||
Counts[idx]++
|
|
||||||
} else {
|
|
||||||
Ops = append(Ops, 'I')
|
|
||||||
Counts = append(Counts, 1)
|
|
||||||
idx++
|
|
||||||
}
|
|
||||||
|
|
||||||
tb_s = tb_s - o - e
|
tb_s = tb_s - o - e
|
||||||
tb_k = tb_k - 1
|
tb_k = tb_k - 1
|
||||||
_, current_dist, current_traceback = M.GetVal(tb_s, tb_k)
|
_, current_traceback = M.GetTraceback(tb_s, tb_k)
|
||||||
case ExtdIns:
|
case ExtdIns:
|
||||||
if Ops[idx] == 'I' {
|
|
||||||
Counts[idx]++
|
|
||||||
} else {
|
|
||||||
Ops = append(Ops, 'I')
|
|
||||||
Counts = append(Counts, 1)
|
|
||||||
idx++
|
|
||||||
}
|
|
||||||
|
|
||||||
tb_s = tb_s - e
|
tb_s = tb_s - e
|
||||||
tb_k = tb_k - 1
|
tb_k = tb_k - 1
|
||||||
_, current_dist, current_traceback = I.GetVal(tb_s, tb_k)
|
_, current_traceback = I.GetTraceback(tb_s, tb_k)
|
||||||
case OpenDel:
|
case OpenDel:
|
||||||
if Ops[idx] == 'D' {
|
|
||||||
Counts[idx]++
|
|
||||||
} else {
|
|
||||||
Ops = append(Ops, 'D')
|
|
||||||
Counts = append(Counts, 1)
|
|
||||||
idx++
|
|
||||||
}
|
|
||||||
|
|
||||||
tb_s = tb_s - o - e
|
tb_s = tb_s - o - e
|
||||||
tb_k = tb_k + 1
|
tb_k = tb_k + 1
|
||||||
_, current_dist, current_traceback = M.GetVal(tb_s, tb_k)
|
_, current_traceback = M.GetTraceback(tb_s, tb_k)
|
||||||
case ExtdDel:
|
case ExtdDel:
|
||||||
if Ops[idx] == 'D' {
|
|
||||||
Counts[idx]++
|
|
||||||
} else {
|
|
||||||
Ops = append(Ops, 'D')
|
|
||||||
Counts = append(Counts, 1)
|
|
||||||
idx++
|
|
||||||
}
|
|
||||||
|
|
||||||
tb_s = tb_s - e
|
tb_s = tb_s - e
|
||||||
tb_k = tb_k + 1
|
tb_k = tb_k + 1
|
||||||
_, current_dist, current_traceback = D.GetVal(tb_s, tb_k)
|
_, current_traceback = D.GetTraceback(tb_s, tb_k)
|
||||||
case Sub:
|
case Sub:
|
||||||
tb_s = tb_s - x
|
tb_s = tb_s - x
|
||||||
// tb_k = tb_k;
|
// tb_k = tb_k;
|
||||||
_, next_dist, next_traceback := M.GetVal(tb_s, tb_k)
|
_, current_traceback = M.GetTraceback(tb_s, tb_k)
|
||||||
|
|
||||||
if int(current_dist-next_dist)-1 > 0 {
|
|
||||||
Ops = append(Ops, 'M')
|
|
||||||
Counts = append(Counts, uint(current_dist-next_dist)-1)
|
|
||||||
idx++
|
|
||||||
}
|
|
||||||
|
|
||||||
if Ops[idx] == 'X' {
|
|
||||||
Counts[idx]++
|
|
||||||
} else {
|
|
||||||
Ops = append(Ops, 'X')
|
|
||||||
Counts = append(Counts, 1)
|
|
||||||
idx++
|
|
||||||
}
|
|
||||||
|
|
||||||
current_dist = next_dist
|
|
||||||
current_traceback = next_traceback
|
|
||||||
case Ins:
|
case Ins:
|
||||||
// tb_s = tb_s;
|
// tb_s = tb_s;
|
||||||
// tb_k = tb_k;
|
// tb_k = tb_k;
|
||||||
_, next_dist, next_traceback := I.GetVal(tb_s, tb_k)
|
_, current_traceback = I.GetTraceback(tb_s, tb_k)
|
||||||
|
|
||||||
Ops = append(Ops, 'M')
|
|
||||||
Counts = append(Counts, uint(current_dist-next_dist))
|
|
||||||
idx++
|
|
||||||
|
|
||||||
current_dist = next_dist
|
|
||||||
current_traceback = next_traceback
|
|
||||||
case Del:
|
case Del:
|
||||||
// tb_s = tb_s;
|
// tb_s = tb_s;
|
||||||
// tb_k = tb_k;
|
// tb_k = tb_k;
|
||||||
_, next_dist, next_traceback := D.GetVal(tb_s, tb_k)
|
_, current_traceback = D.GetTraceback(tb_s, tb_k)
|
||||||
|
|
||||||
Ops = append(Ops, 'M')
|
|
||||||
Counts = append(Counts, uint(current_dist-next_dist))
|
|
||||||
idx++
|
|
||||||
|
|
||||||
current_dist = next_dist
|
|
||||||
current_traceback = next_traceback
|
|
||||||
case End:
|
case End:
|
||||||
Ops = append(Ops, 'M')
|
|
||||||
Counts = append(Counts, uint(current_dist))
|
|
||||||
idx++
|
|
||||||
|
|
||||||
done = true
|
done = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CIGAR := strings.Builder{}
|
CIGAR_part := Reverse(CIGAR_rev)
|
||||||
for i := len(Ops) - 1; i > 0; i-- {
|
c := 0
|
||||||
CIGAR.WriteString(UIntToString(Counts[i]))
|
i := 0
|
||||||
CIGAR.WriteRune(Ops[i])
|
j := 0
|
||||||
|
for i < len(s1) && j < len(s2) {
|
||||||
|
if s1[i] == s2[j] {
|
||||||
|
//CIGAR_part.splice(c, 0, "M")
|
||||||
|
CIGAR_part = Splice(CIGAR_part, 'M', c)
|
||||||
|
c++
|
||||||
|
i++
|
||||||
|
j++
|
||||||
|
} else if CIGAR_part[c] == 'X' {
|
||||||
|
c++
|
||||||
|
i++
|
||||||
|
j++
|
||||||
|
} else if CIGAR_part[c] == 'I' {
|
||||||
|
c++
|
||||||
|
j++
|
||||||
|
} else if CIGAR_part[c] == 'D' {
|
||||||
|
c++
|
||||||
|
i++
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return CIGAR.String()
|
return CIGAR_part
|
||||||
}
|
}
|
||||||
|
122
test/wfa_test.go
122
test/wfa_test.go
@ -3,8 +3,6 @@ package tests
|
|||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"log"
|
|
||||||
"math/rand/v2"
|
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@ -12,7 +10,6 @@ import (
|
|||||||
wfa "wfa/pkg"
|
wfa "wfa/pkg"
|
||||||
|
|
||||||
"github.com/schollz/progressbar/v3"
|
"github.com/schollz/progressbar/v3"
|
||||||
"golang.org/x/exp/constraints"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const testJsonPath = "tests.json"
|
const testJsonPath = "tests.json"
|
||||||
@ -30,103 +27,6 @@ type TestCase struct {
|
|||||||
Solutions string `json:"solutions"`
|
Solutions string `json:"solutions"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func randRange[T constraints.Integer](min, max int) T {
|
|
||||||
return T(rand.IntN(max-min) + min)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestWavefrontPacking(t *testing.T) {
|
|
||||||
for range 1000 {
|
|
||||||
val := randRange[uint32](0, 1000)
|
|
||||||
tb := wfa.Traceback(randRange[uint32](0, 7))
|
|
||||||
v := wfa.PackWavefrontValue(val, tb)
|
|
||||||
|
|
||||||
valid, gotVal, gotTB := wfa.UnpackWavefrontValue(v)
|
|
||||||
|
|
||||||
if !valid || gotVal != val || gotTB != tb {
|
|
||||||
t.Errorf(`test WavefrontPack/Unpack, val: %d, tb: %d, packedval: %x, gotok: %t, gotval: %d, gottb: %d\n`, val, tb, v, valid, gotVal, gotTB)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLoHiPacking(t *testing.T) {
|
|
||||||
for range 1000 {
|
|
||||||
lo := randRange[int](-1000, 1000)
|
|
||||||
hi := randRange[int](-1000, 1000)
|
|
||||||
v := wfa.PackWavefrontLoHi(lo, hi)
|
|
||||||
|
|
||||||
gotLo, gotHi := wfa.UnpackWavefrontLoHi(v)
|
|
||||||
|
|
||||||
if gotLo != lo || gotHi != hi {
|
|
||||||
t.Errorf(`test WavefrontPack/Unpack, lo: %d, hi: %d, packedval: %x, gotlo: %d, gothi: %d`, lo, hi, v, gotLo, gotHi)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetScoreFromCIGAR(CIGAR string, penalties wfa.Penalty) int {
|
|
||||||
unpackedCIGAR := wfa.RunLengthDecode(CIGAR)
|
|
||||||
previousOp := '~'
|
|
||||||
score := 0
|
|
||||||
for _, Op := range unpackedCIGAR {
|
|
||||||
if Op == 'M' {
|
|
||||||
score = score + penalties.M
|
|
||||||
} else if Op == 'X' {
|
|
||||||
score = score + penalties.X
|
|
||||||
} else if (Op == 'I' && previousOp != 'I') || (Op == 'D' && previousOp != 'D') {
|
|
||||||
score = score + penalties.O + penalties.E
|
|
||||||
} else if (Op == 'I' && previousOp == 'I') || (Op == 'D' && previousOp == 'D') {
|
|
||||||
score = score + penalties.E
|
|
||||||
}
|
|
||||||
previousOp = Op
|
|
||||||
}
|
|
||||||
return score
|
|
||||||
}
|
|
||||||
|
|
||||||
func CheckCIGARCorrectness(s1 string, s2 string, CIGAR string) bool {
|
|
||||||
unpackedCIGAR := wfa.RunLengthDecode(CIGAR)
|
|
||||||
i := 0
|
|
||||||
j := 0
|
|
||||||
|
|
||||||
s1Aligned := strings.Builder{}
|
|
||||||
alignment := strings.Builder{}
|
|
||||||
s2Aligned := strings.Builder{}
|
|
||||||
|
|
||||||
for c := 0; c < len(unpackedCIGAR); c++ {
|
|
||||||
Op := unpackedCIGAR[c]
|
|
||||||
if Op == 'M' {
|
|
||||||
s1Aligned.WriteByte(s1[i])
|
|
||||||
alignment.WriteRune('|')
|
|
||||||
s2Aligned.WriteByte(s2[j])
|
|
||||||
i++
|
|
||||||
j++
|
|
||||||
} else if Op == 'X' {
|
|
||||||
s1Aligned.WriteByte(s1[i])
|
|
||||||
alignment.WriteRune(' ')
|
|
||||||
s2Aligned.WriteByte(s2[j])
|
|
||||||
i++
|
|
||||||
j++
|
|
||||||
} else if Op == 'I' {
|
|
||||||
|
|
||||||
s1Aligned.WriteRune('-')
|
|
||||||
alignment.WriteRune(' ')
|
|
||||||
s2Aligned.WriteByte(s2[j])
|
|
||||||
|
|
||||||
j++
|
|
||||||
} else if Op == 'D' {
|
|
||||||
s1Aligned.WriteByte(s1[i])
|
|
||||||
alignment.WriteRune('|')
|
|
||||||
s2Aligned.WriteRune('-')
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if i == len(s1) && j == len(s2) {
|
|
||||||
return true
|
|
||||||
} else {
|
|
||||||
log.Printf("\n%s\n%s\n%s\n i=%d, j=%d, |s1|=%d, |s2|=%d\n", s1Aligned.String(), alignment.String(), s2Aligned.String(), i, j, len(s1), len(s2))
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestWFA(t *testing.T) {
|
func TestWFA(t *testing.T) {
|
||||||
content, _ := os.ReadFile(testJsonPath)
|
content, _ := os.ReadFile(testJsonPath)
|
||||||
|
|
||||||
@ -154,9 +54,7 @@ func TestWFA(t *testing.T) {
|
|||||||
|
|
||||||
for solutions.Scan() {
|
for solutions.Scan() {
|
||||||
solution := solutions.Text()
|
solution := solutions.Text()
|
||||||
|
|
||||||
expectedScore, _ := strconv.Atoi(strings.Split(solution, "\t")[0])
|
expectedScore, _ := strconv.Atoi(strings.Split(solution, "\t")[0])
|
||||||
expectedCIGAR := strings.Split(solution, "\t")[1]
|
|
||||||
|
|
||||||
sequences.Scan()
|
sequences.Scan()
|
||||||
s1 := sequences.Text()
|
s1 := sequences.Text()
|
||||||
@ -166,27 +64,11 @@ func TestWFA(t *testing.T) {
|
|||||||
s2 := sequences.Text()
|
s2 := sequences.Text()
|
||||||
s2 = s2[1:]
|
s2 = s2[1:]
|
||||||
|
|
||||||
x := wfa.WFAlign(s1, s2, testPenalties, true)
|
x := wfa.WFAlign(s1, s2, testPenalties, false)
|
||||||
gotScore := x.Score
|
gotScore := x.Score
|
||||||
gotCIGAR := x.CIGAR
|
|
||||||
|
|
||||||
if gotScore != -1*expectedScore {
|
if gotScore != -1*expectedScore {
|
||||||
t.Errorf(`test: %s#%d, s1: %s, s2: %s, got: %d, expected: %d`, testName, idx, s1, s2, gotScore, expectedScore)
|
t.Errorf(`test: %s#%d, s1: %s, s2: %s, got: %d, expected: %d\n`, testName, idx, s1, s2, gotScore, expectedScore)
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
if gotCIGAR != expectedCIGAR {
|
|
||||||
checkScore := GetScoreFromCIGAR(gotCIGAR, testPenalties)
|
|
||||||
CIGARCorrectness := CheckCIGARCorrectness(s1, s2, gotCIGAR)
|
|
||||||
if checkScore != gotScore && checkScore != -1*expectedScore { // nonequivalent alignment
|
|
||||||
t.Errorf(`test: %s#%d, s1: %s, s2: %s, got: [%s], expected: [%s]`, testName, idx, s1, s2, gotCIGAR, expectedCIGAR)
|
|
||||||
t.Errorf(`test: %s#%d, recalculated score: %d`, testName, idx, checkScore)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
if !CIGARCorrectness {
|
|
||||||
t.Errorf(`test: %s#%d, s1: %s, s2: %s, got: [%s], expected: [%s]`, testName, idx, s1, s2, gotCIGAR, expectedCIGAR)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
idx++
|
idx++
|
||||||
|
12
wfa.js
12
wfa.js
@ -300,14 +300,10 @@
|
|||||||
|
|
||||||
// func finalizeRef(v ref)
|
// func finalizeRef(v ref)
|
||||||
"syscall/js.finalizeRef": (v_ref) => {
|
"syscall/js.finalizeRef": (v_ref) => {
|
||||||
const id = mem().getUint32(unboxValue(v_ref), true);
|
// Note: TinyGo does not support finalizers so this should never be
|
||||||
this._goRefCounts[id]--;
|
// called.
|
||||||
if (this._goRefCounts[id] === 0) {
|
//console.error('syscall/js.finalizeRef not implemented');
|
||||||
const v = this._values[id];
|
// for whatever reason this is called by wfajs but doesnt impact the results at all??
|
||||||
this._values[id] = null;
|
|
||||||
this._ids.delete(v);
|
|
||||||
this._idPool.push(id);
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
|
|
||||||
// func stringVal(value string) ref
|
// func stringVal(value string) ref
|
||||||
|
Loading…
Reference in New Issue
Block a user