go中常见字符串的拼接方式

  • +加号拼接
  • fmt.Sprintf()格式化字符串方式
  • Append
  • bytes.Buffer
  • Join
  • strings.Builder

不同方式性能测试

测试使用的环境

  • go 1.17.9
  • 16g mem, 4cpu
  • cpu: Intel(R) Core(TM) i7-8559U CPU @ 2.70GHz

六种拼接方式的代码实现

package main

import (
	"bytes"
	"fmt"
	"strings"
)

func StringAdd(keys ...string) string {
	var r string
	for i := 0; i < len(keys); i++ {
		r += keys[i]
	}
	return r
}

func StringPrintf(keys ...string) string {
	var r string
	for i := 0; i < len(keys); i++ {
		r = fmt.Sprintf("%s%s", r, keys[i])
	}
	return r
}

func StringJoin(keys ...string) string {
	return strings.Join(keys, "")
}

func StringAppend(keys ...string) string {
	var r []byte
	for i := 0; i < len(keys); i++ {
		r = append(r, keys[i]...)
	}
	return string(r)
}

func BytesBuf(keys ...string) string {
	var b bytes.Buffer
	for i := 0; i < len(keys); i++ {
		b.WriteString(keys[i])
	}
	return b.String()
}

// StringBuilder
func StringBuilder(keys ...string) string {
	var b strings.Builder
	for i := 0; i < len(keys); i++ {
		b.WriteString(keys[i])
	}
	return b.String()
}

// StringBuilderWithGrow 提前分配好内存
func StringBuilderWithGrow(keys ...string) string {
	var length int
	for _, key := range keys {
		length += len(key)
	}
	var b strings.Builder
	b.Grow(length)

	for i := 0; i < len(keys); i++ {
		b.WriteString(keys[i])
	}
	return b.String()
}

beckmark代码实现


//// 小的的测试集
//var testData = []string{
// 	"hello",
// 	"world",
// 	"golang",
// 	"string",
// }

// 大的测试集
var testData = []string{
	"xZDZf4VpDR+bFl9roB8bSL8XTrmQx1A2WHe9egrsmy+JMUGlA/f5tTcFNw==",
	"hm9XZujVO/1PDpQpdboVS8B5BpHn22eXqqudt5ziwEkLFuWeV/WQLHEu8Q==",
	"HUHgtMMD6rKUdu5cKVcL5iQYPuyfutCr5sk2bo+aUdk/wUU5w/o1eGQb0g==",
	"+u9o4oZDVcnyP/fZV4hHozAMp6oP7LcXp54n6d3/rh4hMaTYlEr5eReSjA==",
}

func BenchmarkStringAdd(b *testing.B) {
	var result string
	for n := 0; n < b.N; n++ {
		result = StringAdd(testData...)
	}
	_ = result
}

func BenchmarkStringPrintf(b *testing.B) {
	var result string
	for n := 0; n < b.N; n++ {
		result = StringPrintf(testData...)
	}
	_ = result
}

func BenchmarkStringAppend(b *testing.B) {
	var result string
	for n := 0; n < b.N; n++ {
		result = StringAppend(testData...)
	}
	_ = result
}

func BenchmarkStringJoin(b *testing.B) {
	var result string
	for n := 0; n < b.N; n++ {
		result = StringJoin(testData...)
	}
	_ = result
}

func BenchmarkBytesBuf(b *testing.B) {
	var result string
	for n := 0; n < b.N; n++ {
		result = BytesBuf(testData...)
	}
	_ = result
}

func BenchmarkStringBuilder(b *testing.B) {
	var result string
	for n := 0; n < b.N; n++ {
		result = StringBuilder(testData...)
	}
	_ = result
}

func BenchmarkStringBuilderWithGrow(b *testing.B) {
	var result string
	for n := 0; n < b.N; n++ {
		result = StringBuilderWithGrow(testData...)
	}
	_ = result
}

运行命令:go test -bench=. -benchmem,参数含义如下:

  • count次数
  • benchmem 内存分配次数,每次使用的内存

指标解读:

  • 36.73 ns/op 每次操作需要36.73 ns
  • 80 B/op每次操作需要消耗80 B内存
  • 2 allocs/op每次操作需要2次内存分配

测试结果

小的测试数据集

goos: darwin
goarch: amd64
pkg: demo
cpu: Intel(R) Core(TM) i7-8559U CPU @ 2.70GHz
BenchmarkStringAdd-8                     7511257               183.0 ns/op            56 B/op          3 allocs/op
BenchmarkStringPrintf-8                  1481868               814.0 ns/op           168 B/op         11 allocs/op
BenchmarkStringAppend-8                  7054836               169.0 ns/op            80 B/op          4 allocs/op
BenchmarkStringJoin-8                   16338451                70.08 ns/op           24 B/op          1 allocs/op
BenchmarkBytesBuf-8                     10691223               115.3 ns/op            88 B/op          2 allocs/op
BenchmarkStringBuilder-8                 8440689               134.7 ns/op            56 B/op          3 allocs/op
BenchmarkStringBuilderWithGrow-8        20315769                58.88 ns/op           24 B/op          1 allocs/op
PASS
ok      demo    13.536s

大的测试数据

goos: darwin
goarch: amd64
pkg: demo
cpu: Intel(R) Core(TM) i7-8559U CPU @ 2.70GHz
BenchmarkStringAdd-8                     4541486               257.5 ns/op           560 B/op          3 allocs/op
BenchmarkStringPrintf-8                  1234602              1016 ns/op             736 B/op         11 allocs/op
BenchmarkStringAppend-8                  3915674               284.6 ns/op           688 B/op          4 allocs/op
BenchmarkStringJoin-8                   10637025               113.0 ns/op           240 B/op          1 allocs/op
BenchmarkBytesBuf-8                      3347361               350.7 ns/op           944 B/op          4 allocs/op
BenchmarkStringBuilder-8                 5446124               212.0 ns/op           448 B/op          3 allocs/op
BenchmarkStringBuilderWithGrow-8        10750369               103.7 ns/op           240 B/op          1 allocs/op
PASS

结果分析

+

使用+操作符进行拼接时,会对字符串进行遍历,并开辟一个新的空间来存储原来的两个字符串。

Sprintf

这个是最不推荐的方式,内部使用了反射效率,效率应该是最差的。

Append

Join

对于固定的的字符串集合进行拼接表现比较好,因为Join内部通过Builder实现的并且在实现中做了优化:(提前计算出所需要的内存,一次性分配好)。

// Join concatenates the elements of its first argument to create a single string. The separator
// string sep is placed between elements in the resulting string.
func Join(elems []string, sep string) string {
	switch len(elems) {
	case 0:
		return ""
	case 1:
		return elems[0]
	}
	n := len(sep) * (len(elems) - 1)
	for i := 0; i < len(elems); i++ {
		n += len(elems[i])
	}

	var b Builder
	b.Grow(n) // 
	b.WriteString(elems[0])
	for _, s := range elems[1:] {
		b.WriteString(sep)
		b.WriteString(s)
	}
	return b.String()
}

strings.Builder

拼接字符串最合理的方式,如果对于固定的字符串集合则与Join实现相同的效率(提前分配好内存),对于不确定字符串集合,Build无疑是最佳的选择

type Builder struct {
    addr *Builder // of receiver, to detect copies by value
    buf  []byte // 1
}

//addr字段主要是做copycheck,buf字段是一个byte类型的切片,这个就是用来存放字符串内容的,
//提供的writeString()方法就是像切片buf中追加数据:
func (b *Builder) WriteString(s string) (int, error) {
    b.copyCheck()
    b.buf = append(b.buf, s...)
    return len(s), nil
}

// 为了避免内存拷贝的问题,提供的String方法就是将[]]byte强制转换为string类型。
func (b *Builder) String() string {
    return *(*string)(unsafe.Pointer(&b.buf))
}

bytes.Buffer

type Buffer struct {
    buf      []byte // contents are the bytes buf[off : len(buf)]
    off      int    // read at &buf[off], write at &buf[len(buf)]
    lastRead readOp // last read operation, so that Unread* can work correctly.
}

//切片在创建时并不会申请内存块,只有在往里写数据时才会申请,首次申请的大小即为写入数据的大小。
//如果写入的数据小于64字节,则按64字节申请。采用动态扩展slice的机制,
//字符串追加采用copy的方式将追加的部分拷贝到尾部,copy是内置的拷贝函数,可以减少内存分配。
func (b *Buffer) WriteString(s string) (n int, err error) {
    b.lastRead = opInvalid
    m, ok := b.tryGrowByReslice(len(s))
    if !ok {
        m = b.grow(len(s))
    }
    return copy(b.buf[m:], s), nil
}

// []byte转换为string类型依旧使用了标准类型,所以会发生内存分配
func (b *Buffer) String() string {
    if b == nil {
        // Special case, useful in debugging.
        return "<nil>"
    }
    return string(b.buf[b.off:])
}