golang数组内存分配原理

2022-07-14,,,,

编译时数组类型解析

arraytype

数组是内存中一片连续的区域,在声明时需要指定长度,数组的声明有如下三种方式,[...]的方式在编译时会自动推断长度。

var arr1 [3]int
var arr2 = [3]int{1,2,3}
arr3 := [...]int{1,2,3}

在词法及语法解析时,上述三种方式声明的数组会被解析为arraytype, 当遇到[...]的声明时,其长度会被标记为nil,将在后续阶段进行自动推断。

// go/src/cmd/compile/internal/syntax/parser.go
func (p *parser) typeornil() expr {
  ...
    pos := p.pos()
    switch p.tok {
    ...
    case _lbrack:
        // '[' oexpr ']' ntype
        // '[' _dotdotdot ']' ntype
        p.next()
        if p.got(_rbrack) {
            return p.slicetype(pos)
        }
        return p.arraytype(pos, nil)
  ...
}
// "[" has already been consumed, and pos is its position.
// if len != nil it is the already consumed array length.
func (p *parser) arraytype(pos pos, len expr) expr {
    ...
    if len == nil && !p.got(_dotdotdot) {
        p.xnest++
        len = p.expr()
        p.xnest--
    }
    ...
    p.want(_rbrack)
    t := new(arraytype)
    t.pos = pos
    t.len = len
    t.elem = p.type_()
    return t
}
// go/src/cmd/compile/internal/syntax/nodes.go
type (
  ...
    // [len]elem
    arraytype struct {
        len  expr // nil means len is ...
        elem expr
        expr
    }
  ...
)

types2.array

在对生成的表达式进行类型检查时,如果是arraytype类型,且其长度lennil时,会初始化一个types2.array并将其长度标记为-1,然后通过check.indexedelts(e.elemlist, utyp.elem, utyp.len)返回数组长度n并赋值给len,完成自动推断。

// go/src/cmd/compile/internal/types2/array.go
// an array represents an array type.
type array struct {
    len  int64
    elem type
}
// go/src/cmd/compile/internal/types2/expr.go
// exprinternal contains the core of type checking of expressions.
// must only be called by rawexpr.
func (check *checker) exprinternal(x *operand, e syntax.expr, hint type) exprkind {
    ...
    switch e := e.(type) {
    ...
    case *syntax.compositelit:
        var typ, base type

        switch {
        case e.type != nil:
            // composite literal type present - use it
            // [...]t array types may only appear with composite literals.
            // check for them here so we don't have to handle ... in general.
            if atyp, _ := e.type.(*syntax.arraytype); atyp != nil && atyp.len == nil {
                // we have an "open" [...]t array type.
                // create a new arraytype with unknown length (-1)
                // and finish setting it up after analyzing the literal.
                typ = &array{len: -1, elem: check.vartype(atyp.elem)}
                base = typ
                break
            }
            typ = check.typ(e.type)
            base = typ
      ...
        }

        switch utyp := coretype(base).(type) {
        ...
        case *array:
            if utyp.elem == nil {
                check.error(e, "illegal cycle in type declaration")
                goto error
            }
            n := check.indexedelts(e.elemlist, utyp.elem, utyp.len)
            // if we have an array of unknown length (usually [...]t arrays, but also
            // arrays [n]t where n is invalid) set the length now that we know it and
            // record the type for the array (usually done by check.typ which is not
            // called for [...]t). we handle [...]t arrays and arrays with invalid
            // length the same here because it makes sense to "guess" the length for
            // the latter if we have a composite literal; e.g. for [n]int{1, 2, 3}
            // where n is invalid for some reason, it seems fair to assume it should
            // be 3 (see also checked.arraylength and issue #27346).
            if utyp.len < 0 {
                utyp.len = n
                // e.type is missing if we have a composite literal element
                // that is itself a composite literal with omitted type. in
                // that case there is nothing to record (there is no type in
                // the source at that point).
                if e.type != nil {
                    check.recordtypeandvalue(e.type, typexpr, utyp, nil)
                }
            }
        ...
        }
    ...
}

types.array

在生成中间结果时,types2.array最终会通过types.newarray()转换成types.array类型。

// go/src/cmd/compile/internal/noder/types.go
// typ0 converts a types2.type to a types.type, but doesn't do the caching check
// at the top level.
func (g *irgen) typ0(typ types2.type) *types.type {
    switch typ := typ.(type) {
    ...
    case *types2.array:
        return types.newarray(g.typ1(typ.elem()), typ.len())
    ...
}
// go/src/cmd/compile/internal/types/type.go
// array contains type fields specific to array types.
type array struct {
    elem  *type // element type
    bound int64 // number of elements; <0 if unknown yet
}
// newarray returns a new fixed-length array type.
func newarray(elem *type, bound int64) *type {
    if bound < 0 {
        base.fatalf("newarray: invalid bound %v", bound)
    }
    t := newtype(tarray)
    t.extra = &array{elem: elem, bound: bound}
    t.setnotinheap(elem.notinheap())
    if elem.hastparam() {
        t.sethastparam(true)
    }
    if elem.hasshape() {
        t.sethasshape(true)
    }
    return t
}

编译时数组字面量初始化

数组类型解析可以得到数组元素的类型elem以及数组长度bound,而数组字面量的初始化是在编译时类型检查阶段完成的,通过函数tccomplit -> typecheckarraylit循环字面量分别进行赋值。

// go/src/cmd/compile/internal/typecheck/expr.go
func tccomplit(n *ir.complitexpr) (res ir.node) {
    ...
    t := n.type()
    base.assertfat(t != nil, n.pos(), "missing type in composite literal")

    switch t.kind() {
    ...
    case types.tarray:
        typecheckarraylit(t.elem(), t.numelem(), n.list, "array literal")
        n.setop(ir.oarraylit)
    ...

    return n
}
// go/src/cmd/compile/internal/typecheck/typecheck.go
// typecheckarraylit type-checks a sequence of slice/array literal elements.
func typecheckarraylit(elemtype *types.type, bound int64, elts []ir.node, ctx string) int64 {
    ...
    for i, elt := range elts {
        ir.setpos(elt)
        r := elts[i]
        ...
        r = expr(r)
        r = assignconv(r, elemtype, ctx)
        ...
}

编译时数组索引越界检查

在对数组进行索引访问时,如果访问越界在编译时就无法通过检查。

例如:

arr := [...]string{"s1", "s2", "s3"}
e3 := arr[3]
// invalid array index 3 (out of bounds for 3-element array)

数组在类型检查阶段会对访问数组的索引进行验证:

// go/src/cmd/compile/internal/typecheck/typecheck.go
func typecheck1(n ir.node, top int) ir.node {
  ...
    switch n.op() {
  ...
  case ir.oindex:
        n := n.(*ir.indexexpr)
        return tcindex(n)
  ...
  }
}
// go/src/cmd/compile/internal/typecheck/expr.go
func tcindex(n *ir.indexexpr) ir.node {
    ...
    l := n.x
    n.index = expr(n.index)
    r := n.index
    t := l.type()
    ...
    switch t.kind() {
    ...
    case types.tstring, types.tarray, types.tslice:
        n.index = indexlit(n.index)
        if t.isstring() {
            n.settype(types.bytetype)
        } else {
            n.settype(t.elem())
        }
        why := "string"
        if t.isarray() {
            why = "array"
        } else if t.isslice() {
            why = "slice"
        }
        if n.index.type() != nil && !n.index.type().isinteger() {
            base.errorf("non-integer %s index %v", why, n.index)
            return n
        }
        if !n.bounded() && ir.isconst(n.index, constant.int) {
            x := n.index.val()
            if constant.sign(x) < 0 {
                base.errorf("invalid %s index %v (index must be non-negative)", why, n.index)
            } else if t.isarray() && constant.compare(x, token.geq, constant.makeint64(t.numelem())) {
                base.errorf("invalid array index %v (out of bounds for %d-element array)", n.index, t.numelem())
            } else if ir.isconst(n.x, constant.string) && constant.compare(x, token.geq, constant.makeint64(int64(len(ir.stringval(n.x))))) {
                base.errorf("invalid string index %v (out of bounds for %d-byte string)", n.index, len(ir.stringval(n.x)))
            } else if ir.constoverflow(x, types.types[types.tint]) {
                base.errorf("invalid %s index %v (index too large)", why, n.index)
            }
        }
    ...
    }
    return n
}

运行时数组内存分配

数组是内存区域一块连续的存储空间。在运行时会通过mallocgc给数组分配具体的存储空间。newarray中如果数组元素刚好只有一个,则空间大小为元素类型的大小typ.size, 如果有多个元素则内存大小为n*typ.size。但这并不是实际分配的内存大小,实际分配多少内存,取决于mallocgc,涉及到golang的内存分配原理。但可以看到如果待分配的对象不超过32kb,mallocgc会直接将其分配在缓存空间中,如果大于32kb则直接从堆区分配内存空间。

// go/src/runtime/malloc.go
// newarray allocates an array of n elements of type typ.
func newarray(typ *_type, n int) unsafe.pointer {
    if n == 1 {
        return mallocgc(typ.size, typ, true)
    }
    mem, overflow := math.muluintptr(typ.size, uintptr(n))
    if overflow || mem > maxalloc || n < 0 {
        panic(plainerror("runtime: allocation size out of range"))
    }
    return mallocgc(mem, typ, true)
}
// allocate an object of size bytes.
// small objects are allocated from the per-p cache's free lists.
// large objects (> 32 kb) are allocated straight from the heap.
func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.pointer {
    ...
}

总结

数组在编译阶段最终被解析为types.array类型,包含元素类型elem和数组长度bound

type array struct {
  elem  *type // element type
  bound int64 // number of elements; <0 if unknown yet
}
  • 如果数组长度未指定,例如使用了语法糖[...],则会在表达式类型检查时计算出数组长度。
  • 数组字面量初始化以及索引越界检查都是在编译时类型检查阶段完成的。
  • 在运行时通过newarray()函数对数组内存进行分配,如果数组大小超过32kb则会直接分配到堆区内存。

到此这篇关于golang数组内存分配原理的文章就介绍到这了,更多相关golang数组原理内容请搜索以前的文章或继续浏览下面的相关文章希望大家以后多多支持!

《golang数组内存分配原理.doc》

下载本文的Word格式文档,以方便收藏与打印。