And a recursive idea (edit on Feb 5 '16 to work with NA
s in pattern):
find_pat = function(pat, x)
{
ff = function(.pat, .x, acc = if(length(.pat)) seq_along(.x) else integer(0L)) {
if(!length(.pat)) return(acc)
if(is.na(.pat[[1L]]))
Recall(.pat[-1L], .x, acc[which(is.na(.x[acc]))] + 1L)
else
Recall(.pat[-1L], .x, acc[which(.pat[[1L]] == .x[acc])] + 1L)
}
return(ff(pat, x) - length(pat))
}
find_pat(1:2, myVector)
#[1] 3 7
find_pat(c(4, 1, 1), myVector)
#[1] 5
find_pat(1:3, myVector)
#integer(0)
find_pat(c(NA, 1), myVector)
#[1] 2
find_pat(c(3, NA), myVector)
#[1] 1
And on a benchmark:
all.equal(matchSequence(s, my_vec2), find_pat(s, my_vec2))
#[1] TRUE
microbenchmark::microbenchmark(matchSequence(s, my_vec2),
flm(s, my_vec2),
find_pat(s, my_vec2),
unit = "relative")
#Unit: relative
# expr min lq median uq max neval
# matchSequence(s, my_vec2) 2.970888 3.096573 3.068802 3.023167 12.41387 100
# flm(s, my_vec2) 1.140777 1.173043 1.258394 1.280753 12.79848 100
# find_pat(s, my_vec2) 1.000000 1.000000 1.000000 1.000000 1.00000 100
Using larger data:
set.seed(911); VEC = sample(c(NA, 1:3), 1e6, TRUE); PAT = c(3, 2, 2, 1, 3, 2, 2, 1, 1, 3)
all.equal(matchSequence(PAT, VEC), find_pat(PAT, VEC))
#[1] TRUE
microbenchmark::microbenchmark(matchSequence(PAT, VEC),
flm(PAT, VEC),
find_pat(PAT, VEC),
unit = "relative", times = 20)
#Unit: relative
# expr min lq median uq max neval
# matchSequence(PAT, VEC) 23.106862 20.54601 19.831344 18.677528 12.563634 20
# flm(PAT, VEC) 2.810611 2.51955 2.963352 2.877195 1.728512 20
# find_pat(PAT, VEC) 1.000000 1.00000 1.000000 1.000000 1.000000 20