A regex is a useful feature to check whether a string contains a special format. If we have many formats, we might want to check it one after another in a loop. However, haven’t you ever gotten feedback in a code review that it’s expensive? Let’s check how expensive it is in this post.
If you want to try it yourself, you can clone my GitHub repository. The code is here.
The strings that we want
This is the example string list that we want to check whether a string contains one of the following.
var covers = []string{
"AAAAAAAAAAAA",
"BBBBBBBBBBBB",
"CCCCCCCCCCCC",
"DDDDDDDDDDDD",
"EEEEEEEEEEEE",
"FFFFFFFFFFFF",
"GGGGGGGGGGGG",
"HHHHHHHHHHHH",
"IIIIIIIIIIII",
"JJJJJJJJJJJJ",
"KKKKKKKKKKKK",
"LLLLLLLLLLLL",
"MMMMMMMMMMMM",
"NNNNNNNNNNNN",
"OOOOOOOOOOOO",
"PPPPPPPPPPPP",
"QQQQQQQQQQQQ",
"RRRRRRRRRRRR",
"SSSSSSSSSSSS",
"TTTTTTTTTTTT",
"UUUUUUUUUUUU",
"VVVVVVVVVVVV",
"WWWWWWWWWWWW",
"XXXXXXXXXXXX",
"YYYYYYYYYYYY",
"ZZZZZZZZZZZZ",
}
func funcExample(targetString string) {
// check whether targetString contains one of them...
}
However, it should end with a dot or a backslash.
There are several ways to check it.
- Create the target regex in a loop
- Create only one regex by concatenating them with
|
- Use
strings.Contains()
instead
Comparing result
This is the test string list. It contains both a dot and a backslash. There are some unmatched strings too.
var targets = []string{
"AAAAAAAAAAAA.1234567",
"BBBBBBBBBBBB.1234567",
"CCCCCCCCCCCC\\1234567",
"DDDDDDDDDDDD.1234567",
"EEEEEEEEEEEE.1234567",
"FFFFFFFFFFFF.1234567",
"ABDECGRERW3R2.437298",
"SDFAJKLCXVZ.243798",
"GGGGGGGGGGGG.1234567",
"HHHHHHHHHHHH.1234567",
"IIIIIIIIIIII\\1234567",
"WWJYEAEQXDRZ.58739",
"JJJJJJJJJJJJ.1234567",
"KKKKKKKKKKKK.1234567",
"LLLLLLLLLLLL.1234567",
"MMMMMMMMMMMM.1234567",
"BVWYFUOIERFT.193678",
"NNNNNNNNNNNN.1234567",
"OOOOOOOOOOOO.1234567",
"PPPPPPPPPPPP.1234567",
"QQQQQQQQQQQQ\\1234567",
"RRRRRRRRRRRR.1234567",
"EFQUYGVDSFRE.3524967",
"SSSSSSSSSSSS.1234567",
"TTTTTTTTTTTT.1234567",
"UUUUUUUUUUUU.1234567",
"VVVVVVVVVVVV.1234567",
"VDSBHAOVFDSK.0952843",
"WWWWWWWWWWWW.1234567",
"XXXXXXXXXXXX.1234567",
"YYYYYYYYYYYY.1234567",
"ZZZZZZZZZZZZ.1234567",
}
Then, check the benchmark code and the result.
func BenchmarkRegexLoopMustCompile(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, converString := range covers {
regex := regexp.MustCompile(converString + `[\.\\]`)
for _, target := range targets {
if regex.MatchString(target) {
break
}
}
}
}
}
func BenchmarkRegexLoopCompile(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, converString := range covers {
regex, err := regexp.Compile(converString + `[\.\\]`)
if err != nil {
fmt.Println("failed")
return
}
for _, target := range targets {
if regex.MatchString(target) {
break
}
}
}
}
}
func BenchmarkRegexLoopCompileOnlyWithDot(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, converString := range covers {
regex, err := regexp.Compile(converString + `\.`)
if err != nil {
fmt.Println("failed")
return
}
for _, target := range targets {
if regex.MatchString(target) {
break
}
}
}
}
}
func BenchmarkRegexOnce(b *testing.B) {
for i := 0; i < b.N; i++ {
regexString := fmt.Sprintf(`(%s)[\.\\]`, strings.Join(covers, "|"))
regex, err := regexp.Compile(regexString)
if err != nil {
fmt.Println("failed")
return
}
for _, target := range targets {
if regex.MatchString(target) {
break
}
}
}
}
func BenchmarkRegexOnceOnlyWithDot(b *testing.B) {
for i := 0; i < b.N; i++ {
regexString := fmt.Sprintf(`(%s)\.`, strings.Join(covers, "|"))
regex, err := regexp.Compile(regexString)
if err != nil {
fmt.Println("failed")
return
}
for _, target := range targets {
if regex.MatchString(target) {
break
}
}
}
}
func BenchmarkRegexContains1(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, target := range targets {
for _, coverString := range covers {
if strings.Contains(target, coverString) {
break
}
}
}
}
}
func BenchmarkRegexContains2(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, target := range targets {
for _, coverString := range covers {
if strings.Contains(target, fmt.Sprintf("%s.", coverString)) {
break
}
}
}
}
}
func BenchmarkRegexContains3(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, target := range targets {
for _, coverString := range covers {
withDot := fmt.Sprintf("%s.", coverString)
withBackSlash := fmt.Sprintf("%s\\", coverString)
if strings.Contains(target, withDot) || strings.Contains(target, withBackSlash) {
break
}
}
}
}
}
type Covers struct {
withDot string
withBackSlash string
}
func BenchmarkRegexContains4(b *testing.B) {
coversStruct := make([]Covers, 0, len(covers))
for _, cover := range covers {
coversStruct = append(coversStruct, Covers{
withDot: fmt.Sprintf("%s.", cover),
withBackSlash: fmt.Sprintf("%s\\", cover),
})
}
for i := 0; i < b.N; i++ {
for _, target := range targets {
for _, coverStruct := range coversStruct {
if strings.Contains(target, coverStruct.withDot) || strings.Contains(target, coverStruct.withBackSlash) {
break
}
}
}
}
}
// $ go test ./benchmark -bench Regex -benchmem
// goos: linux
// goarch: amd64
// pkg: play-with-go-lang/benchmark
// cpu: Intel(R) Core(TM) i7-9850H CPU @ 2.60GHz
// BenchmarkRegexLoopMustCompile-12 14544 82590 ns/op 56624 B/op 598 allocs/op
// BenchmarkRegexLoopCompile-12 14756 79542 ns/op 56656 B/op 598 allocs/op
// BenchmarkRegexLoopCompileOnlyWithDot-12 15729 78042 ns/op 55637 B/op 546 allocs/op
// BenchmarkRegexOnce-12 34831 35230 ns/op 52094 B/op 139 allocs/op
// BenchmarkRegexOnceOnlyWithDot-12 34657 33822 ns/op 52078 B/op 138 allocs/op
// BenchmarkRegexContains1-12 248371 4692 ns/op 0 B/op 0 allocs/op
// BenchmarkRegexContains2-12 19749 60038 ns/op 17799 B/op 1112 allocs/op
// BenchmarkRegexContains3-12 10000 111526 ns/op 32461 B/op 2028 allocs/op
// BenchmarkRegexContains4-12 129026 9165 ns/op 0 B/op 0 allocs/op
// PASS
// ok play-with-go-lang/benchmark 14.569s
strings.Contains()
is much faster than using regex. If we don’t need to get the target string by using xxx(target1|target2)xxx
, it’s better to use strings.Contains()
. However, the target strings should be created in advance because fmt.Sprint()
requires a long time to generate the new string as you can see the result for BenchmarkRegexContains3
. It should not be called in a loop if possible.
If we really need a regex, create only one instance to be able to cover all the cases in the following way.
regexString := fmt.Sprintf(`(%s)[\.\\]`, strings.Join(covers, "|"))
regex, err := regexp.Compile(regexString)
Conclusion
It depends on the number of target strings but creating a instance of regex and use it in a loop is slower than strings.Contains()
. Consider whether other functions listed below can be used when you use regex.
strings.Contains()
strings.HasPrefix()
strings.HasSuffix()
This change could make the code 10 times faster.
Check the following post if you are not familiar with Regex in Golang.
Comments