# 第2篇:CPython实现原理:整数对象(前篇)

``````....
/* Long integer representation.
The absolute value of a number is equal to
SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
Negative numbers are represented with ob_size < 0;
zero is represented by ob_size == 0.
In a normalized number, ob_digit[abs(ob_size)-1] (the most significant
digit) is never zero.  Also, in all cases, for all valid i,
The allocation function takes care of allocating extra memory
so that ob_digit[0] ... ob_digit[abs(ob_size)-1] are actually available.

CAUTION:  Generic code manipulating subtypes of PyVarObject has to
aware that ints abuse  ob_size's sign bit.
*/
struct _longobject {
digit ob_digit[1];
};
....
typedef struct _longobject PyLongObject;
``````

``````....
struct _longobject {
PyObject ob_base;
Py_ssize_t ob_size; /* Number of items in variable part */
digit ob_digit[1];
};
....
typedef struct _longobject PyLongObject;
``````

SUM（for i = 0 through abs（ob_size）-1）ob_digit [i] * 2 **（SHIFT * i）

• 负数用ob_size <0表示；
• 0由ob_size == 0表示
• 以标准化数字ob_digit [abs(ob_size)-1]（最高有效数字）永远不会为0。 而且，在所有情况下，对于所有有效的i，0 <= ob_digit [i] <=掩码
• 内存分配函数负责分配额外的内存，因此ob_digit [0]到ob_digit [abs(ob_size)-1]实际上可用的有效负载部分。

• 把30位大小的数值存储在uint32_t类型的ob_digit数组中
• 把15位大小的数值存储在uint32_t类型的ob_digit数组中
``````
#if PYLONG_BITS_IN_DIGIT == 30
typedef uint32_t digit;
typedef int32_t sdigit; /* signed variant of digit */
typedef uint64_t twodigits;
typedef int64_t stwodigits; /* signed variant of twodigits */
#define PyLong_SHIFT    30
#define _PyLong_DECIMAL_SHIFT   9 /* max(e such that 10**e fits in a digit) */
#define _PyLong_DECIMAL_BASE    ((digit)1000000000) /* 10 ** DECIMAL_SHIFT */
#elif PYLONG_BITS_IN_DIGIT == 15
typedef unsigned short digit;
typedef short sdigit; /* signed variant of digit */
typedef unsigned long twodigits;
typedef long stwodigits; /* signed variant of twodigits */
#define PyLong_SHIFT    15
#define _PyLong_DECIMAL_SHIFT   4 /* max(e such that 10**e fits in a digit) */
#define _PyLong_DECIMAL_BASE    ((digit)10000) /* 10 ** DECIMAL_SHIFT */
#else
#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"
#endif
#define PyLong_BASE     ((digit)1 << PyLong_SHIFT)

#if PyLong_SHIFT % 5 != 0
#error "longobject.c requires that PyLong_SHIFT be divisible by 5"
#endif
``````

``````PyObject *
PyLong_FromLong(long ival)
{
PyLongObject *v;
unsigned long abs_ival;
unsigned long t;  /* unsigned so >> doesn't propagate sign bit */
int ndigits = 0;
int sign;

if (IS_SMALL_INT(ival)) {
return get_small_int((sdigit)ival);
}

if (ival < 0) {
/* negate: can't write this as abs_ival = -ival since that
invokes undefined behaviour when ival is LONG_MIN */
abs_ival = 0U-(unsigned long)ival;
sign = -1;
}
else {
abs_ival = (unsigned long)ival;
sign = ival == 0 ? 0 : 1;
}

/* Fast path for single-digit ints */
if (!(abs_ival >> PyLong_SHIFT)) {
v = _PyLong_New(1);
if (v) {
Py_SET_SIZE(v, sign);
v->ob_digit[0] = Py_SAFE_DOWNCAST(
abs_ival, unsigned long, digit);
}
return (PyObject*)v;
}

#if PyLong_SHIFT==15
/* 2 digits */
if (!(abs_ival >> 2*PyLong_SHIFT)) {
v = _PyLong_New(2);
if (v) {
Py_SET_SIZE(v, 2 * sign);
v->ob_digit[0] = Py_SAFE_DOWNCAST(
abs_ival & PyLong_MASK, unsigned long, digit);
v->ob_digit[1] = Py_SAFE_DOWNCAST(
abs_ival >> PyLong_SHIFT, unsigned long, digit);
}
return (PyObject*)v;
}
#endif

/* Larger numbers: loop to determine number of digits */
t = abs_ival;
while (t) {
++ndigits;
t >>= PyLong_SHIFT;
}
v = _PyLong_New(ndigits);
if (v != NULL) {
digit *p = v->ob_digit;
Py_SET_SIZE(v, ndigits * sign);
t = abs_ival;
while (t) {
*p++ = Py_SAFE_DOWNCAST(
t & PyLong_MASK, unsigned long, digit);
t >>= PyLong_SHIFT;
}
}
return (PyObject *)v;
}
``````

### 整数对象的初始化

``````PyTypeObject PyLong_Type = {
"int",                                      /* tp_name */
offsetof(PyLongObject, ob_digit),           /* tp_basicsize */
sizeof(digit),                              /* tp_itemsize */
....
long_to_decimal_string,                     /* tp_repr */
&long_as_number,                            /* tp_as_number */
....
(hashfunc)long_hash,                        /* tp_hash */
....
PyObject_GenericGetAttr,                    /* tp_getattro */
....
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
Py_TPFLAGS_LONG_SUBCLASS,               /* tp_flags */
long_doc,                                   /* tp_doc */
....
long_richcompare,                           /* tp_richcompare */
....
long_methods,                               /* tp_methods */
0,                                          /* tp_members */
long_getset,                                /* tp_getset */
....
long_new,                                   /* tp_new */
PyObject_Del,                               /* tp_free */
};
``````

``````static PyObject *
long_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"", "base", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "int", 0};
PyObject *argsbuf[2];
PyObject * const *fastargs;
Py_ssize_t nargs = PyTuple_GET_SIZE(args);
Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0;
PyObject *x = NULL;
PyObject *obase = NULL;

fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 0, 2, 0, argsbuf);
if (!fastargs) {
goto exit;
}
if (nargs < 1) {
goto skip_optional_posonly;
}
noptargs--;
x = fastargs[0];
skip_optional_posonly:
if (!noptargs) {
goto skip_optional_pos;
}
obase = fastargs[1];
skip_optional_pos:
return_value = long_new_impl(type, x, obase);

exit:
return return_value;
}
``````

``````static PyObject *
long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase)
/*[clinic end generated code: output=e47cfe777ab0f24c input=81c98f418af9eb6f]*/
{
Py_ssize_t base;

if (type != &PyLong_Type)
return long_subtype_new(type, x, obase); /* Wimp out */
//当x为NULL，底数非NULL返回,以0为参数调用PyLong_FromLong函数
if (x == NULL) {
if (obase != NULL) {
PyErr_SetString(PyExc_TypeError,
"int() missing string argument");
return NULL;
}
return PyLong_FromLong(0L);
}
//当x非NULL，obase为NULL,调用PyNumber_Long函数
if (obase == NULL)
return PyNumber_Long(x);

base = PyNumber_AsSsize_t(obase, NULL);
if (base == -1 && PyErr_Occurred())
return NULL;
//base只能在属于0或区间[2,36]等整数
if ((base != 0 && base < 2) || base > 36) {
PyErr_SetString(PyExc_ValueError,
"int() base must be >= 2 and <= 36, or 0");
return NULL;
}

if (PyUnicode_Check(x))
return PyLong_FromUnicodeObject(x, (int)base);
else if (PyByteArray_Check(x) || PyBytes_Check(x)) {
const char *string;
if (PyByteArray_Check(x))
string = PyByteArray_AS_STRING(x);
else
string = PyBytes_AS_STRING(x);
return _PyLong_FromBytes(string, Py_SIZE(x), (int)base);
}
else {
PyErr_SetString(PyExc_TypeError,
"int() can't convert non-string with explicit base");
return NULL;
}
}
``````

int(...) PyObject PyLong_Type long_new long_new_impl

C底层long_new_impl函数很大程度上反映了Python层面的类int在实例化时的行为。因为针对参数x的不同情况，long_new_impl根据相应的条件去调用PyLong_前缀的函数族中对应的函数来实例化PyLongObject

### 小型整数

``````//位于Objects/longobject.c文件
#define NSMALLPOSINTS           _PY_NSMALLPOSINTS
#define NSMALLNEGINTS           _PY_NSMALLNEGINTS
....

//位于Include/internal/pycore_interp.h文件

//小型整数的右开区间，最大值256
#define _PY_NSMALLPOSINTS           257
//小型整数的左闭区间，为-5
#define _PY_NSMALLNEGINTS           5

// The PyInterpreterState typedef is in Include/pystate.h.
struct _is {
.....
#if _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS > 0
/* Small integers are preallocated in this array so that they
can be shared.
The integers that are preallocated are those in the range
-_PY_NSMALLNEGINTS (inclusive) to _PY_NSMALLPOSINTS (not inclusive).
*/
PyLongObject* small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS];
#endif
};

//位于Objects/longobject.c文件
#if NSMALLNEGINTS + NSMALLPOSINTS > 0

#define IS_SMALL_INT(ival) (-NSMALLNEGINTS <= (ival) && (ival) < NSMALLPOSINTS)
#define IS_SMALL_UINT(ival) ((ival) < NSMALLPOSINTS)
``````

``````int
{
#if NSMALLNEGINTS + NSMALLPOSINTS > 0
for (Py_ssize_t i=0; i < NSMALLNEGINTS + NSMALLPOSINTS; i++) {
sdigit ival = (sdigit)i - NSMALLNEGINTS;
int size = (ival < 0) ? -1 : ((ival == 0) ? 0 : 1);

PyLongObject *v = _PyLong_New(1);
if (!v) {
return -1;
}

Py_SET_SIZE(v, size);
v->ob_digit[0] = (digit)abs(ival);

tstate->interp->small_ints[i] = v;
}
#endif

if (_Py_IsMainInterpreter(tstate)) {
_PyLong_Zero = PyLong_FromLong(0);
if (_PyLong_Zero == NULL) {
return 0;
}

_PyLong_One = PyLong_FromLong(1);
if (_PyLong_One == NULL) {
return 0;
}

/* initialize int_info */
if (Int_InfoType.tp_name == NULL) {
if (PyStructSequence_InitType2(&Int_InfoType, &int_info_desc) < 0) {
return 0;
}
}
}

return 1;
}
``````

• 因为Cython语法声明的变量，都是原始的C级别的数据类型，它们默认是基于C运行时系统的栈，而非CPython的数据栈或堆。
• C运行时的栈(stack)提供了原始级别的数据访问，因此存取速度会比基于堆、或CPython内部stack指针构建的数据栈要快不是一两个数量级的问题，而是快十几个数量级。
• 再者，Cython语法声明的整数对象在编译后的对象初始化的时间开销并是恒定时间开销O(1),Python实例化一个PyLongObject需要的时间开销，最起码也是O(n^2),因为实例化一个PyLongObject的Python语句等价于CPython内部执行5-6个指令码，我们知道每执行一个指令码都要遍历一次Python的解释循环，并执行其中内部C函数。还没有算上CPython运行时栈和堆的开销呢！！

``````.....
static PyObject *
get_small_int(sdigit ival)
{
assert(IS_SMALL_INT(ival));
PyObject *v = (PyObject*)tstate->interp->small_ints[ival + NSMALLNEGINTS];
Py_INCREF(v);
return v;
}
....
static PyLongObject *
maybe_small_long(PyLongObject *v)
{
if (v && Py_ABS(Py_SIZE(v)) <= 1) {
sdigit ival = MEDIUM_VALUE(v);
if (IS_SMALL_INT(ival)) {
Py_DECREF(v);
return (PyLongObject *)get_small_int(ival);
}
}
return v;
}
#else
#define IS_SMALL_INT(ival) 0
#define IS_SMALL_UINT(ival) 0
#define get_small_int(ival) (Py_UNREACHABLE(), NULL)
#define maybe_small_long(val) (val)
#endif
``````