go
中常见字符串的拼接方式
+
加号拼接fmt.Sprintf()
格式化字符串方式Append
bytes.Buffer
Join
strings.Builder
不同方式性能测试
测试使用的环境
go 1.17.9
16g mem, 4cpu
cpu: Intel(R) Core(TM) i7-8559U CPU @ 2.70GHz
六种拼接方式的代码实现
package main
import (
"bytes"
"fmt"
"strings"
)
func StringAdd(keys ...string) string {
var r string
for i := 0; i < len(keys); i++ {
r += keys[i]
}
return r
}
func StringPrintf(keys ...string) string {
var r string
for i := 0; i < len(keys); i++ {
r = fmt.Sprintf("%s%s", r, keys[i])
}
return r
}
func StringJoin(keys ...string) string {
return strings.Join(keys, "")
}
func StringAppend(keys ...string) string {
var r []byte
for i := 0; i < len(keys); i++ {
r = append(r, keys[i]...)
}
return string(r)
}
func BytesBuf(keys ...string) string {
var b bytes.Buffer
for i := 0; i < len(keys); i++ {
b.WriteString(keys[i])
}
return b.String()
}
// StringBuilder
func StringBuilder(keys ...string) string {
var b strings.Builder
for i := 0; i < len(keys); i++ {
b.WriteString(keys[i])
}
return b.String()
}
// StringBuilderWithGrow 提前分配好内存
func StringBuilderWithGrow(keys ...string) string {
var length int
for _, key := range keys {
length += len(key)
}
var b strings.Builder
b.Grow(length)
for i := 0; i < len(keys); i++ {
b.WriteString(keys[i])
}
return b.String()
}
beckmark代码实现
//// 小的的测试集
//var testData = []string{
// "hello",
// "world",
// "golang",
// "string",
// }
// 大的测试集
var testData = []string{
"xZDZf4VpDR+bFl9roB8bSL8XTrmQx1A2WHe9egrsmy+JMUGlA/f5tTcFNw==",
"hm9XZujVO/1PDpQpdboVS8B5BpHn22eXqqudt5ziwEkLFuWeV/WQLHEu8Q==",
"HUHgtMMD6rKUdu5cKVcL5iQYPuyfutCr5sk2bo+aUdk/wUU5w/o1eGQb0g==",
"+u9o4oZDVcnyP/fZV4hHozAMp6oP7LcXp54n6d3/rh4hMaTYlEr5eReSjA==",
}
func BenchmarkStringAdd(b *testing.B) {
var result string
for n := 0; n < b.N; n++ {
result = StringAdd(testData...)
}
_ = result
}
func BenchmarkStringPrintf(b *testing.B) {
var result string
for n := 0; n < b.N; n++ {
result = StringPrintf(testData...)
}
_ = result
}
func BenchmarkStringAppend(b *testing.B) {
var result string
for n := 0; n < b.N; n++ {
result = StringAppend(testData...)
}
_ = result
}
func BenchmarkStringJoin(b *testing.B) {
var result string
for n := 0; n < b.N; n++ {
result = StringJoin(testData...)
}
_ = result
}
func BenchmarkBytesBuf(b *testing.B) {
var result string
for n := 0; n < b.N; n++ {
result = BytesBuf(testData...)
}
_ = result
}
func BenchmarkStringBuilder(b *testing.B) {
var result string
for n := 0; n < b.N; n++ {
result = StringBuilder(testData...)
}
_ = result
}
func BenchmarkStringBuilderWithGrow(b *testing.B) {
var result string
for n := 0; n < b.N; n++ {
result = StringBuilderWithGrow(testData...)
}
_ = result
}
运行命令:go test -bench=. -benchmem
,参数含义如下:
count
次数benchmem
内存分配次数,每次使用的内存
指标解读:
36.73 ns/op
每次操作需要36.73 ns
80 B/op
每次操作需要消耗80 B
内存2 allocs/op
每次操作需要2次
内存分配
测试结果
小的测试数据集
goos: darwin
goarch: amd64
pkg: demo
cpu: Intel(R) Core(TM) i7-8559U CPU @ 2.70GHz
BenchmarkStringAdd-8 7511257 183.0 ns/op 56 B/op 3 allocs/op
BenchmarkStringPrintf-8 1481868 814.0 ns/op 168 B/op 11 allocs/op
BenchmarkStringAppend-8 7054836 169.0 ns/op 80 B/op 4 allocs/op
BenchmarkStringJoin-8 16338451 70.08 ns/op 24 B/op 1 allocs/op
BenchmarkBytesBuf-8 10691223 115.3 ns/op 88 B/op 2 allocs/op
BenchmarkStringBuilder-8 8440689 134.7 ns/op 56 B/op 3 allocs/op
BenchmarkStringBuilderWithGrow-8 20315769 58.88 ns/op 24 B/op 1 allocs/op
PASS
ok demo 13.536s
大的测试数据
goos: darwin
goarch: amd64
pkg: demo
cpu: Intel(R) Core(TM) i7-8559U CPU @ 2.70GHz
BenchmarkStringAdd-8 4541486 257.5 ns/op 560 B/op 3 allocs/op
BenchmarkStringPrintf-8 1234602 1016 ns/op 736 B/op 11 allocs/op
BenchmarkStringAppend-8 3915674 284.6 ns/op 688 B/op 4 allocs/op
BenchmarkStringJoin-8 10637025 113.0 ns/op 240 B/op 1 allocs/op
BenchmarkBytesBuf-8 3347361 350.7 ns/op 944 B/op 4 allocs/op
BenchmarkStringBuilder-8 5446124 212.0 ns/op 448 B/op 3 allocs/op
BenchmarkStringBuilderWithGrow-8 10750369 103.7 ns/op 240 B/op 1 allocs/op
PASS
结果分析
+
使用+
操作符进行拼接时,会对字符串进行遍历,并开辟一个新的空间来存储原来的两个字符串。
Sprintf
这个是最不推荐的方式,内部使用了反射效率,效率应该是最差的。
Append
Join
对于固定的的字符串集合进行拼接表现比较好,因为Join
内部通过Builder
实现的并且在实现中做了优化:(提前计算出所需要的内存,一次性分配好)。
// Join concatenates the elements of its first argument to create a single string. The separator
// string sep is placed between elements in the resulting string.
func Join(elems []string, sep string) string {
switch len(elems) {
case 0:
return ""
case 1:
return elems[0]
}
n := len(sep) * (len(elems) - 1)
for i := 0; i < len(elems); i++ {
n += len(elems[i])
}
var b Builder
b.Grow(n) //
b.WriteString(elems[0])
for _, s := range elems[1:] {
b.WriteString(sep)
b.WriteString(s)
}
return b.String()
}
strings.Builder
拼接字符串最合理的方式,如果对于固定的字符串集合则与Join
实现相同的效率(提前分配好内存),对于不确定字符串集合,Build
无疑是最佳的选择
type Builder struct {
addr *Builder // of receiver, to detect copies by value
buf []byte // 1
}
//addr字段主要是做copycheck,buf字段是一个byte类型的切片,这个就是用来存放字符串内容的,
//提供的writeString()方法就是像切片buf中追加数据:
func (b *Builder) WriteString(s string) (int, error) {
b.copyCheck()
b.buf = append(b.buf, s...)
return len(s), nil
}
// 为了避免内存拷贝的问题,提供的String方法就是将[]]byte强制转换为string类型。
func (b *Builder) String() string {
return *(*string)(unsafe.Pointer(&b.buf))
}
bytes.Buffer
type Buffer struct {
buf []byte // contents are the bytes buf[off : len(buf)]
off int // read at &buf[off], write at &buf[len(buf)]
lastRead readOp // last read operation, so that Unread* can work correctly.
}
//切片在创建时并不会申请内存块,只有在往里写数据时才会申请,首次申请的大小即为写入数据的大小。
//如果写入的数据小于64字节,则按64字节申请。采用动态扩展slice的机制,
//字符串追加采用copy的方式将追加的部分拷贝到尾部,copy是内置的拷贝函数,可以减少内存分配。
func (b *Buffer) WriteString(s string) (n int, err error) {
b.lastRead = opInvalid
m, ok := b.tryGrowByReslice(len(s))
if !ok {
m = b.grow(len(s))
}
return copy(b.buf[m:], s), nil
}
// []byte转换为string类型依旧使用了标准类型,所以会发生内存分配
func (b *Buffer) String() string {
if b == nil {
// Special case, useful in debugging.
return "<nil>"
}
return string(b.buf[b.off:])
}