Skip to content
This repository was archived by the owner on Jul 5, 2023. It is now read-only.

Implement underscores in numeric literals #21

Merged
merged 5 commits into from
Oct 31, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions ast35/Include/Python-ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ struct _expr {

struct {
object n;
int contains_underscores;
} Num;

struct {
Expand Down Expand Up @@ -570,8 +571,9 @@ expr_ty _Ta35_Compare(expr_ty left, asdl_int_seq * ops, asdl_seq * comparators,
#define Call(a0, a1, a2, a3, a4, a5) _Ta35_Call(a0, a1, a2, a3, a4, a5)
expr_ty _Ta35_Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int
lineno, int col_offset, PyArena *arena);
#define Num(a0, a1, a2, a3) _Ta35_Num(a0, a1, a2, a3)
expr_ty _Ta35_Num(object n, int lineno, int col_offset, PyArena *arena);
#define Num(a0, a1, a2, a3, a4) _Ta35_Num(a0, a1, a2, a3, a4)
expr_ty _Ta35_Num(object n, int contains_underscores, int lineno, int
col_offset, PyArena *arena);
#define Str(a0, a1, a2, a3) _Ta35_Str(a0, a1, a2, a3)
expr_ty _Ta35_Str(string s, int lineno, int col_offset, PyArena *arena);
#define Bytes(a0, a1, a2, a3) _Ta35_Bytes(a0, a1, a2, a3)
Expand Down
4 changes: 3 additions & 1 deletion ast35/Parser/Python.asdl
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ module Python
-- x < 4 < 3 and (x < 4) < 3
| Compare(expr left, cmpop* ops, expr* comparators)
| Call(expr func, expr* args, keyword* keywords)
| Num(object n) -- a number as a PyObject.
-- contains_underscores is not part of standard Python ASDL
-- and exists here to signal that a Python 3.6 feature was used
| Num(object n, int? contains_underscores) -- a number as a PyObject.
| Str(string s) -- need to specify raw, unicode, etc?
| Bytes(bytes s)
| NameConstant(singleton value)
Expand Down
129 changes: 90 additions & 39 deletions ast35/Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1343,6 +1343,27 @@ verify_identifier(struct tok_state *tok)
}
#endif

static int
tok_decimal_tail(struct tok_state *tok)
{
int c;
while (1) {
do {
c = tok_nextc(tok);
} while (isdigit(c));
if (c != '_') {
break;
}
c = tok_nextc(tok);
if (!isdigit(c)) {
tok->done = E_TOKEN;
tok_backup(tok, c);
return 0;
}
}
return c;
}

/* Get next token, after space stripping etc. */

static int
Expand Down Expand Up @@ -1644,64 +1665,88 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
if (c == '0') {
/* Hex, octal or binary -- maybe. */
c = tok_nextc(tok);
if (c == '.')
goto fraction;
if (c == 'j' || c == 'J')
goto imaginary;
if (c == 'x' || c == 'X') {

/* Hex */
c = tok_nextc(tok);
if (!isxdigit(c)) {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while (isxdigit(c));
if (c == '_')
c = tok_nextc(tok);
if (!isxdigit(c)) {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while (isxdigit(c));
} while (c == '_');
}
else if (c == 'o' || c == 'O') {
/* Octal */
c = tok_nextc(tok);
if (c < '0' || c >= '8') {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while ('0' <= c && c < '8');
if (c == '_')
c = tok_nextc(tok);
if (c < '0' || c >= '8') {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while ('0' <= c && c < '8');
} while (c == '_');
}
else if (c == 'b' || c == 'B') {
/* Binary */
c = tok_nextc(tok);
if (c != '0' && c != '1') {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while (c == '0' || c == '1');
if (c == '_')
c = tok_nextc(tok);
if (c != '0' && c != '1') {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while (c == '0' || c == '1');
} while (c == '_');
}
else {
int nonzero = 0;
/* maybe old-style octal; c is first char of it */
/* in any case, allow '0' as a literal */
while (c == '0')
while (1) {
if (c == '_') {
c = tok_nextc(tok);
if (!isdigit(c)) {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
}
if (c != '0')
break;
c = tok_nextc(tok);
while (isdigit(c)) {
}
if (isdigit(c)) {
nonzero = 1;
c = tok_nextc(tok);
c = tok_decimal_tail(tok);
if (c == 0) {
return ERRORTOKEN;
}
}
if (c == '.')
if (c == '.') {
c = tok_nextc(tok);
goto fraction;
}
else if (c == 'e' || c == 'E')
goto exponent;
else if (c == 'j' || c == 'J')
goto imaginary;
else if (nonzero) {
/* Old-style octal: now disallowed. */
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
Expand All @@ -1710,17 +1755,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
}
else {
/* Decimal */
do {
c = tok_nextc(tok);
} while (isdigit(c));
c = tok_decimal_tail(tok);
if (c == 0) {
return ERRORTOKEN;
}
{
/* Accept floating point numbers. */
if (c == '.') {
c = tok_nextc(tok);
fraction:
/* Fraction */
do {
c = tok_nextc(tok);
} while (isdigit(c));
if (isdigit(c)) {
c = tok_decimal_tail(tok);
if (c == 0) {
return ERRORTOKEN;
}
}
}
if (c == 'e' || c == 'E') {
int e;
Expand All @@ -1742,9 +1792,10 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
*p_end = tok->cur;
return NUMBER;
}
do {
c = tok_nextc(tok);
} while (isdigit(c));
c = tok_decimal_tail(tok);
if (c == 0) {
return ERRORTOKEN;
}
}
if (c == 'j' || c == 'J')
/* Imaginary part */
Expand Down
27 changes: 24 additions & 3 deletions ast35/Python/Python-ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,10 @@ static char *Call_fields[]={
};
static PyTypeObject *Num_type;
_Py_IDENTIFIER(n);
_Py_IDENTIFIER(contains_underscores);
static char *Num_fields[]={
"n",
"contains_underscores",
};
static PyTypeObject *Str_type;
_Py_IDENTIFIER(s);
Expand Down Expand Up @@ -937,7 +939,7 @@ static int init_types(void)
if (!Compare_type) return 0;
Call_type = make_type("Call", expr_type, Call_fields, 3);
if (!Call_type) return 0;
Num_type = make_type("Num", expr_type, Num_fields, 1);
Num_type = make_type("Num", expr_type, Num_fields, 2);
if (!Num_type) return 0;
Str_type = make_type("Str", expr_type, Str_fields, 1);
if (!Str_type) return 0;
Expand Down Expand Up @@ -2077,7 +2079,8 @@ Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int lineno, int
}

expr_ty
Num(object n, int lineno, int col_offset, PyArena *arena)
Num(object n, int contains_underscores, int lineno, int col_offset, PyArena
*arena)
{
expr_ty p;
if (!n) {
Expand All @@ -2090,6 +2093,7 @@ Num(object n, int lineno, int col_offset, PyArena *arena)
return NULL;
p->kind = Num_kind;
p->v.Num.n = n;
p->v.Num.contains_underscores = contains_underscores;
p->lineno = lineno;
p->col_offset = col_offset;
return p;
Expand Down Expand Up @@ -3267,6 +3271,12 @@ ast2obj_expr(void* _o)
if (_PyObject_SetAttrId(result, &PyId_n, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_int(o->v.Num.contains_underscores);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_contains_underscores, value) ==
-1)
goto failed;
Py_DECREF(value);
break;
case Str_kind:
result = PyType_GenericNew(Str_type, NULL, NULL);
Expand Down Expand Up @@ -6267,6 +6277,7 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena)
}
if (isinstance) {
object n;
int contains_underscores;

if (_PyObject_HasAttrId(obj, &PyId_n)) {
int res;
Expand All @@ -6279,7 +6290,17 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena)
PyErr_SetString(PyExc_TypeError, "required field \"n\" missing from Num");
return 1;
}
*out = Num(n, lineno, col_offset, arena);
if (exists_not_none(obj, &PyId_contains_underscores)) {
int res;
tmp = _PyObject_GetAttrId(obj, &PyId_contains_underscores);
if (tmp == NULL) goto failed;
res = obj2ast_int(tmp, &contains_underscores, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
} else {
contains_underscores = 0;
}
*out = Num(n, contains_underscores, lineno, col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
Expand Down
34 changes: 31 additions & 3 deletions ast35/Python/ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -2204,15 +2204,18 @@ ast_for_atom(struct compiling *c, const node *n)
return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
}
case NUMBER: {
PyObject *pynum = parsenumber(c, STR(ch));
const char *s = STR(ch);
int contains_underscores = strchr(s, '_') != NULL;
PyObject *pynum = parsenumber(c, s);
if (!pynum)
return NULL;

if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
Py_DECREF(pynum);
return NULL;
}
return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
return Num(pynum, contains_underscores, LINENO(n),
n->n_col_offset, c->c_arena);
}
case ELLIPSIS: /* Ellipsis */
return Ellipsis(LINENO(n), n->n_col_offset, c->c_arena);
Expand Down Expand Up @@ -4124,7 +4127,7 @@ ast_for_stmt(struct compiling *c, const node *n)
}

static PyObject *
parsenumber(struct compiling *c, const char *s)
parsenumber_raw(struct compiling *c, const char *s)
{
const char *end;
long x;
Expand Down Expand Up @@ -4166,6 +4169,31 @@ parsenumber(struct compiling *c, const char *s)
}
}

static PyObject *
parsenumber(struct compiling *c, const char *s)
{
char *dup, *end;
PyObject *res = NULL;

assert(s != NULL);

if (strchr(s, '_') == NULL) {
return parsenumber_raw(c, s);
}
/* Create a duplicate without underscores. */
dup = PyMem_Malloc(strlen(s) + 1);
end = dup;
for (; *s; s++) {
if (*s != '_') {
*end++ = *s;
}
}
*end = '\0';
res = parsenumber_raw(c, dup);
PyMem_Free(dup);
return res;
}

static PyObject *
decode_utf8(struct compiling *c, const char **sPtr, const char *end)
{
Expand Down