Python 解释器实现:内置数据类型
目录

Python内置数据类型:

  • 空值: None
  • 数字: bool, int, long, float, complex
  • 序列: str, unicode, list, tuple
  • 字典: dict
  • 集合: set/frozenset

对象头部

Python中一切皆是对象(PyObject),并且分配在heap中。 每个对象都包含对象头部(PyObject_HEAD),里面包括引用计数和type对象指针。 对象一旦在heap中分配,其内存地址和大小都不会变化。

基本类型头部:

// Include/object.h
#define PyObject_HEAD                   \
Py_ssize_t ob_refcnt;    // ssize_t \
struct _typeobject *ob_type;

typedef struct _object {
PyObject_HEAD
} PyObject;

容器类型头部:

// Include/object.h
#define PyObject_VAR_HEAD               \
PyObject_HEAD                       \
Py_ssize_t ob_size; /* Number of items in variable part */

typedef struct {
PyObject_VAR_HEAD
} PyVarObject;

1. 基本类型

1.1 None

// Include/object.h
#define Py_None (&_Py_NoneStruct)

// Include/object.c
PyObject _Py_NoneStruct = {
1, &PyNone_Type
};

PyObject类型的静态全局变量,表示了None值。

1.2 整型 int

// Include/intobject.h
typedef struct {
PyObject_HEAD
long ob_ival;
} PyIntObject;

Int型的最大值:

// Include/pyport.h
#define LONG_MAX 0X7FFFFFFFFFFFFFFFL   // 64bit platform

// Objects/intobject.c
long
PyInt_GetMax(void)
{
return LONG_MAX;            /* To initialize sys.maxint */
}

1.3 布尔型 bool

// Include/boolobject.h
typedef PyIntObject PyBoolObject;

值得注意的是Python中TrueFalse的定义如下:

// Include/boolobject.h
#define Py_False ((PyObject *) &_Py_ZeroStruct)
#define Py_True ((PyObject *) &_Py_TrueStruct)

// Include/boolobject.c
PyIntObject _Py_ZeroStruct = {
PyObject_HEAD_INIT(&PyBool_Type)
0
};

PyIntObject _Py_TrueStruct = {
PyObject_HEAD_INIT(&PyBool_Type)
1
};

两个PyIntObject类型全局静态对象,表示了TrueFalse两个布尔值。 所以,在Python中:

>>> int(True)
1
>>> int(False)
0
>>> range(10)[x > 3]
1

但是TrueFalse的头部类型指针还是指向PyBool_Type,和PyInt_Type不同。

1.4 长整型 long

// Include/longintrepr.h
#if PYLONG_BITS_IN_DIGIT == 30
typedef PY_UINT32_T digit;
...
#elif PYLONG_BITS_IN_DIGIT == 15
typedef unsigned short digit;
#else
#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"
#endif

struct _longobject {
PyObject_VAR_HEAD
digit ob_digit[1];
};
typedef struct _longobject PyLongObject;

1.5 浮点型 float

// Include/floatobject.h
typedef struct {
PyObject_HEAD
double ob_fval;
} PyFloatObject;

1.6 复数型 complex

// Include/complexobject.h
typedef struct {
double real;
double imag;
} Py_complex;

typedef struct {
PyObject_HEAD
Py_complex cval;
} PyComplexObject;

2. 容器类型

2.1 字符串类型 str

// Include/stringobject.h
typedef struct {
PyObject_VAR_HEAD
long ob_shash;
int ob_sstate;
char ob_sval[1];
} PyStringObject;

2.2 Unicode类型 unicode

// Include/unicodeobject.h

typedef PY_UNICODE_TYPE Py_UNICODE;  // 4 Bytes

typedef struct {
PyObject_HEAD
Py_ssize_t length;          /* Length of raw Unicode data in buffer */
Py_UNICODE *str;            /* Raw Unicode buffer */
long hash;                  /* Hash value; -1 if not set */
PyObject *defenc;           /* (Default) Encoded version as Python
string, or NULL; this is used for
implementing the buffer protocol */
} PyUnicodeObject;

2.3 序列类型 list

// Include/listobject.h

typedef struct {
PyObject_VAR_HEAD
PyObject **ob_item;
Py_ssize_t allocated;
} PyListObject;

2.4 元组类型 tuple

// Include/tupleobject.h
typedef struct {
PyObject_VAR_HEAD
PyObject *ob_item[1];
} PyTupleObject;

2.5 字典类型 dict

// Include/dictobject.h
typedef struct {
Py_ssize_t me_hash;
PyObject *me_key;
PyObject *me_value;
} PyDictEntry;

typedef struct _dictobject PyDictObject;
struct _dictobject {
PyObject_HEAD
Py_ssize_t ma_fill;  /* # Active + # Dummy */
Py_ssize_t ma_used;  /* # Active */

Py_ssize_t ma_mask;

PyDictEntry *ma_table;
PyDictEntry *(*ma_lookup)(PyDictObject *mp, PyObject *key, long hash);
PyDictEntry ma_smalltable[PyDict_MINSIZE];
};

2.6 集合类型 set/frozenset

// Include/setobject.h
typedef struct {
long hash;      /* cached hash code for the entry key */
PyObject *key;
} setentry;

typedef struct _setobject PySetObject;
struct _setobject {
PyObject_HEAD

Py_ssize_t fill;  /* # Active + # Dummy */
Py_ssize_t used;  /* # Active */

Py_ssize_t mask;

setentry *table;
setentry *(*lookup)(PySetObject *so, PyObject *key, long hash);
setentry smalltable[PySet_MINSIZE];

long hash;                  /* only used by frozenset objects */
PyObject *weakreflist;      /* List of weak references */
};

发表评论