1272 lines
33 KiB
C
1272 lines
33 KiB
C
/* strop module */
|
|
|
|
#define PY_SSIZE_T_CLEAN
|
|
#include "Python.h"
|
|
#include <ctype.h>
|
|
|
|
PyDoc_STRVAR(strop_module__doc__,
|
|
"Common string manipulations, optimized for speed.\n"
|
|
"\n"
|
|
"Always use \"import string\" rather than referencing\n"
|
|
"this module directly.");
|
|
|
|
/* XXX This file assumes that the <ctype.h> is*() functions
|
|
XXX are defined for all 8-bit characters! */
|
|
|
|
#define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
|
|
"strop functions are obsolete; use string methods")) \
|
|
return NULL
|
|
|
|
/* The lstrip(), rstrip() and strip() functions are implemented
|
|
in do_strip(), which uses an additional parameter to indicate what
|
|
type of strip should occur. */
|
|
|
|
#define LEFTSTRIP 0
|
|
#define RIGHTSTRIP 1
|
|
#define BOTHSTRIP 2
|
|
|
|
|
|
static PyObject *
|
|
split_whitespace(char *s, Py_ssize_t len, Py_ssize_t maxsplit)
|
|
{
|
|
Py_ssize_t i = 0, j;
|
|
int err;
|
|
Py_ssize_t countsplit = 0;
|
|
PyObject* item;
|
|
PyObject *list = PyList_New(0);
|
|
|
|
if (list == NULL)
|
|
return NULL;
|
|
|
|
while (i < len) {
|
|
while (i < len && isspace(Py_CHARMASK(s[i]))) {
|
|
i = i+1;
|
|
}
|
|
j = i;
|
|
while (i < len && !isspace(Py_CHARMASK(s[i]))) {
|
|
i = i+1;
|
|
}
|
|
if (j < i) {
|
|
item = PyString_FromStringAndSize(s+j, i-j);
|
|
if (item == NULL)
|
|
goto finally;
|
|
|
|
err = PyList_Append(list, item);
|
|
Py_DECREF(item);
|
|
if (err < 0)
|
|
goto finally;
|
|
|
|
countsplit++;
|
|
while (i < len && isspace(Py_CHARMASK(s[i]))) {
|
|
i = i+1;
|
|
}
|
|
if (maxsplit && (countsplit >= maxsplit) && i < len) {
|
|
item = PyString_FromStringAndSize(
|
|
s+i, len - i);
|
|
if (item == NULL)
|
|
goto finally;
|
|
|
|
err = PyList_Append(list, item);
|
|
Py_DECREF(item);
|
|
if (err < 0)
|
|
goto finally;
|
|
|
|
i = len;
|
|
}
|
|
}
|
|
}
|
|
return list;
|
|
finally:
|
|
Py_DECREF(list);
|
|
return NULL;
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(splitfields__doc__,
|
|
"split(s [,sep [,maxsplit]]) -> list of strings\n"
|
|
"splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
|
|
"\n"
|
|
"Return a list of the words in the string s, using sep as the\n"
|
|
"delimiter string. If maxsplit is nonzero, splits into at most\n"
|
|
"maxsplit words. If sep is not specified, any whitespace string\n"
|
|
"is a separator. Maxsplit defaults to 0.\n"
|
|
"\n"
|
|
"(split and splitfields are synonymous)");
|
|
|
|
static PyObject *
|
|
strop_splitfields(PyObject *self, PyObject *args)
|
|
{
|
|
Py_ssize_t len, n, i, j, err;
|
|
Py_ssize_t splitcount, maxsplit;
|
|
char *s, *sub;
|
|
PyObject *list, *item;
|
|
|
|
WARN;
|
|
sub = NULL;
|
|
n = 0;
|
|
splitcount = 0;
|
|
maxsplit = 0;
|
|
if (!PyArg_ParseTuple(args, "t#|z#n:split", &s, &len, &sub, &n, &maxsplit))
|
|
return NULL;
|
|
if (sub == NULL)
|
|
return split_whitespace(s, len, maxsplit);
|
|
if (n == 0) {
|
|
PyErr_SetString(PyExc_ValueError, "empty separator");
|
|
return NULL;
|
|
}
|
|
|
|
list = PyList_New(0);
|
|
if (list == NULL)
|
|
return NULL;
|
|
|
|
i = j = 0;
|
|
while (i+n <= len) {
|
|
if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
|
|
item = PyString_FromStringAndSize(s+j, i-j);
|
|
if (item == NULL)
|
|
goto fail;
|
|
err = PyList_Append(list, item);
|
|
Py_DECREF(item);
|
|
if (err < 0)
|
|
goto fail;
|
|
i = j = i + n;
|
|
splitcount++;
|
|
if (maxsplit && (splitcount >= maxsplit))
|
|
break;
|
|
}
|
|
else
|
|
i++;
|
|
}
|
|
item = PyString_FromStringAndSize(s+j, len-j);
|
|
if (item == NULL)
|
|
goto fail;
|
|
err = PyList_Append(list, item);
|
|
Py_DECREF(item);
|
|
if (err < 0)
|
|
goto fail;
|
|
|
|
return list;
|
|
|
|
fail:
|
|
Py_DECREF(list);
|
|
return NULL;
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(joinfields__doc__,
|
|
"join(list [,sep]) -> string\n"
|
|
"joinfields(list [,sep]) -> string\n"
|
|
"\n"
|
|
"Return a string composed of the words in list, with\n"
|
|
"intervening occurrences of sep. Sep defaults to a single\n"
|
|
"space.\n"
|
|
"\n"
|
|
"(join and joinfields are synonymous)");
|
|
|
|
static PyObject *
|
|
strop_joinfields(PyObject *self, PyObject *args)
|
|
{
|
|
PyObject *seq;
|
|
char *sep = NULL;
|
|
Py_ssize_t seqlen, seplen = 0;
|
|
Py_ssize_t i, reslen = 0, slen = 0, sz = 100;
|
|
PyObject *res = NULL;
|
|
char* p = NULL;
|
|
ssizeargfunc getitemfunc;
|
|
|
|
WARN;
|
|
if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
|
|
return NULL;
|
|
if (sep == NULL) {
|
|
sep = " ";
|
|
seplen = 1;
|
|
}
|
|
|
|
seqlen = PySequence_Size(seq);
|
|
if (seqlen < 0 && PyErr_Occurred())
|
|
return NULL;
|
|
|
|
if (seqlen == 1) {
|
|
/* Optimization if there's only one item */
|
|
PyObject *item = PySequence_GetItem(seq, 0);
|
|
if (item && !PyString_Check(item)) {
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"first argument must be sequence of strings");
|
|
Py_DECREF(item);
|
|
return NULL;
|
|
}
|
|
return item;
|
|
}
|
|
|
|
if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
|
|
return NULL;
|
|
p = PyString_AsString(res);
|
|
|
|
/* optimize for lists, since it's the most common case. all others
|
|
* (tuples and arbitrary sequences) just use the sequence abstract
|
|
* interface.
|
|
*/
|
|
if (PyList_Check(seq)) {
|
|
for (i = 0; i < seqlen; i++) {
|
|
PyObject *item = PyList_GET_ITEM(seq, i);
|
|
if (!PyString_Check(item)) {
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"first argument must be sequence of strings");
|
|
Py_DECREF(res);
|
|
return NULL;
|
|
}
|
|
slen = PyString_GET_SIZE(item);
|
|
if (slen > PY_SSIZE_T_MAX - reslen ||
|
|
seplen > PY_SSIZE_T_MAX - reslen - seplen) {
|
|
PyErr_SetString(PyExc_OverflowError,
|
|
"input too long");
|
|
Py_DECREF(res);
|
|
return NULL;
|
|
}
|
|
while (reslen + slen + seplen >= sz) {
|
|
if (_PyString_Resize(&res, sz * 2) < 0)
|
|
return NULL;
|
|
sz *= 2;
|
|
p = PyString_AsString(res) + reslen;
|
|
}
|
|
if (i > 0) {
|
|
memcpy(p, sep, seplen);
|
|
p += seplen;
|
|
reslen += seplen;
|
|
}
|
|
memcpy(p, PyString_AS_STRING(item), slen);
|
|
p += slen;
|
|
reslen += slen;
|
|
}
|
|
_PyString_Resize(&res, reslen);
|
|
return res;
|
|
}
|
|
|
|
if (seq->ob_type->tp_as_sequence == NULL ||
|
|
(getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
|
|
{
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"first argument must be a sequence");
|
|
return NULL;
|
|
}
|
|
/* This is now type safe */
|
|
for (i = 0; i < seqlen; i++) {
|
|
PyObject *item = getitemfunc(seq, i);
|
|
if (!item || !PyString_Check(item)) {
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"first argument must be sequence of strings");
|
|
Py_DECREF(res);
|
|
Py_XDECREF(item);
|
|
return NULL;
|
|
}
|
|
slen = PyString_GET_SIZE(item);
|
|
if (slen > PY_SSIZE_T_MAX - reslen ||
|
|
seplen > PY_SSIZE_T_MAX - reslen - seplen) {
|
|
PyErr_SetString(PyExc_OverflowError,
|
|
"input too long");
|
|
Py_DECREF(res);
|
|
Py_XDECREF(item);
|
|
return NULL;
|
|
}
|
|
while (reslen + slen + seplen >= sz) {
|
|
if (_PyString_Resize(&res, sz * 2) < 0) {
|
|
Py_DECREF(item);
|
|
return NULL;
|
|
}
|
|
sz *= 2;
|
|
p = PyString_AsString(res) + reslen;
|
|
}
|
|
if (i > 0) {
|
|
memcpy(p, sep, seplen);
|
|
p += seplen;
|
|
reslen += seplen;
|
|
}
|
|
memcpy(p, PyString_AS_STRING(item), slen);
|
|
p += slen;
|
|
reslen += slen;
|
|
Py_DECREF(item);
|
|
}
|
|
_PyString_Resize(&res, reslen);
|
|
return res;
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(find__doc__,
|
|
"find(s, sub [,start [,end]]) -> in\n"
|
|
"\n"
|
|
"Return the lowest index in s where substring sub is found,\n"
|
|
"such that sub is contained within s[start,end]. Optional\n"
|
|
"arguments start and end are interpreted as in slice notation.\n"
|
|
"\n"
|
|
"Return -1 on failure.");
|
|
|
|
static PyObject *
|
|
strop_find(PyObject *self, PyObject *args)
|
|
{
|
|
char *s, *sub;
|
|
Py_ssize_t len, n, i = 0, last = PY_SSIZE_T_MAX;
|
|
|
|
WARN;
|
|
if (!PyArg_ParseTuple(args, "t#t#|nn:find", &s, &len, &sub, &n, &i, &last))
|
|
return NULL;
|
|
|
|
if (last > len)
|
|
last = len;
|
|
if (last < 0)
|
|
last += len;
|
|
if (last < 0)
|
|
last = 0;
|
|
if (i < 0)
|
|
i += len;
|
|
if (i < 0)
|
|
i = 0;
|
|
|
|
if (n == 0 && i <= last)
|
|
return PyInt_FromLong((long)i);
|
|
|
|
last -= n;
|
|
for (; i <= last; ++i)
|
|
if (s[i] == sub[0] &&
|
|
(n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
|
|
return PyInt_FromLong((long)i);
|
|
|
|
return PyInt_FromLong(-1L);
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(rfind__doc__,
|
|
"rfind(s, sub [,start [,end]]) -> int\n"
|
|
"\n"
|
|
"Return the highest index in s where substring sub is found,\n"
|
|
"such that sub is contained within s[start,end]. Optional\n"
|
|
"arguments start and end are interpreted as in slice notation.\n"
|
|
"\n"
|
|
"Return -1 on failure.");
|
|
|
|
static PyObject *
|
|
strop_rfind(PyObject *self, PyObject *args)
|
|
{
|
|
char *s, *sub;
|
|
Py_ssize_t len, n, j;
|
|
Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
|
|
|
|
WARN;
|
|
if (!PyArg_ParseTuple(args, "t#t#|nn:rfind", &s, &len, &sub, &n, &i, &last))
|
|
return NULL;
|
|
|
|
if (last > len)
|
|
last = len;
|
|
if (last < 0)
|
|
last += len;
|
|
if (last < 0)
|
|
last = 0;
|
|
if (i < 0)
|
|
i += len;
|
|
if (i < 0)
|
|
i = 0;
|
|
|
|
if (n == 0 && i <= last)
|
|
return PyInt_FromLong((long)last);
|
|
|
|
for (j = last-n; j >= i; --j)
|
|
if (s[j] == sub[0] &&
|
|
(n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
|
|
return PyInt_FromLong((long)j);
|
|
|
|
return PyInt_FromLong(-1L);
|
|
}
|
|
|
|
|
|
static PyObject *
|
|
do_strip(PyObject *args, int striptype)
|
|
{
|
|
char *s;
|
|
Py_ssize_t len, i, j;
|
|
|
|
|
|
if (PyString_AsStringAndSize(args, &s, &len))
|
|
return NULL;
|
|
|
|
i = 0;
|
|
if (striptype != RIGHTSTRIP) {
|
|
while (i < len && isspace(Py_CHARMASK(s[i]))) {
|
|
i++;
|
|
}
|
|
}
|
|
|
|
j = len;
|
|
if (striptype != LEFTSTRIP) {
|
|
do {
|
|
j--;
|
|
} while (j >= i && isspace(Py_CHARMASK(s[j])));
|
|
j++;
|
|
}
|
|
|
|
if (i == 0 && j == len) {
|
|
Py_INCREF(args);
|
|
return args;
|
|
}
|
|
else
|
|
return PyString_FromStringAndSize(s+i, j-i);
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(strip__doc__,
|
|
"strip(s) -> string\n"
|
|
"\n"
|
|
"Return a copy of the string s with leading and trailing\n"
|
|
"whitespace removed.");
|
|
|
|
static PyObject *
|
|
strop_strip(PyObject *self, PyObject *args)
|
|
{
|
|
WARN;
|
|
return do_strip(args, BOTHSTRIP);
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(lstrip__doc__,
|
|
"lstrip(s) -> string\n"
|
|
"\n"
|
|
"Return a copy of the string s with leading whitespace removed.");
|
|
|
|
static PyObject *
|
|
strop_lstrip(PyObject *self, PyObject *args)
|
|
{
|
|
WARN;
|
|
return do_strip(args, LEFTSTRIP);
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(rstrip__doc__,
|
|
"rstrip(s) -> string\n"
|
|
"\n"
|
|
"Return a copy of the string s with trailing whitespace removed.");
|
|
|
|
static PyObject *
|
|
strop_rstrip(PyObject *self, PyObject *args)
|
|
{
|
|
WARN;
|
|
return do_strip(args, RIGHTSTRIP);
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(lower__doc__,
|
|
"lower(s) -> string\n"
|
|
"\n"
|
|
"Return a copy of the string s converted to lowercase.");
|
|
|
|
static PyObject *
|
|
strop_lower(PyObject *self, PyObject *args)
|
|
{
|
|
char *s, *s_new;
|
|
Py_ssize_t i, n;
|
|
PyObject *newstr;
|
|
int changed;
|
|
|
|
WARN;
|
|
if (PyString_AsStringAndSize(args, &s, &n))
|
|
return NULL;
|
|
newstr = PyString_FromStringAndSize(NULL, n);
|
|
if (newstr == NULL)
|
|
return NULL;
|
|
s_new = PyString_AsString(newstr);
|
|
changed = 0;
|
|
for (i = 0; i < n; i++) {
|
|
int c = Py_CHARMASK(*s++);
|
|
if (isupper(c)) {
|
|
changed = 1;
|
|
*s_new = tolower(c);
|
|
} else
|
|
*s_new = c;
|
|
s_new++;
|
|
}
|
|
if (!changed) {
|
|
Py_DECREF(newstr);
|
|
Py_INCREF(args);
|
|
return args;
|
|
}
|
|
return newstr;
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(upper__doc__,
|
|
"upper(s) -> string\n"
|
|
"\n"
|
|
"Return a copy of the string s converted to uppercase.");
|
|
|
|
static PyObject *
|
|
strop_upper(PyObject *self, PyObject *args)
|
|
{
|
|
char *s, *s_new;
|
|
Py_ssize_t i, n;
|
|
PyObject *newstr;
|
|
int changed;
|
|
|
|
WARN;
|
|
if (PyString_AsStringAndSize(args, &s, &n))
|
|
return NULL;
|
|
newstr = PyString_FromStringAndSize(NULL, n);
|
|
if (newstr == NULL)
|
|
return NULL;
|
|
s_new = PyString_AsString(newstr);
|
|
changed = 0;
|
|
for (i = 0; i < n; i++) {
|
|
int c = Py_CHARMASK(*s++);
|
|
if (islower(c)) {
|
|
changed = 1;
|
|
*s_new = toupper(c);
|
|
} else
|
|
*s_new = c;
|
|
s_new++;
|
|
}
|
|
if (!changed) {
|
|
Py_DECREF(newstr);
|
|
Py_INCREF(args);
|
|
return args;
|
|
}
|
|
return newstr;
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(capitalize__doc__,
|
|
"capitalize(s) -> string\n"
|
|
"\n"
|
|
"Return a copy of the string s with only its first character\n"
|
|
"capitalized.");
|
|
|
|
static PyObject *
|
|
strop_capitalize(PyObject *self, PyObject *args)
|
|
{
|
|
char *s, *s_new;
|
|
Py_ssize_t i, n;
|
|
PyObject *newstr;
|
|
int changed;
|
|
|
|
WARN;
|
|
if (PyString_AsStringAndSize(args, &s, &n))
|
|
return NULL;
|
|
newstr = PyString_FromStringAndSize(NULL, n);
|
|
if (newstr == NULL)
|
|
return NULL;
|
|
s_new = PyString_AsString(newstr);
|
|
changed = 0;
|
|
if (0 < n) {
|
|
int c = Py_CHARMASK(*s++);
|
|
if (islower(c)) {
|
|
changed = 1;
|
|
*s_new = toupper(c);
|
|
} else
|
|
*s_new = c;
|
|
s_new++;
|
|
}
|
|
for (i = 1; i < n; i++) {
|
|
int c = Py_CHARMASK(*s++);
|
|
if (isupper(c)) {
|
|
changed = 1;
|
|
*s_new = tolower(c);
|
|
} else
|
|
*s_new = c;
|
|
s_new++;
|
|
}
|
|
if (!changed) {
|
|
Py_DECREF(newstr);
|
|
Py_INCREF(args);
|
|
return args;
|
|
}
|
|
return newstr;
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(expandtabs__doc__,
|
|
"expandtabs(string, [tabsize]) -> string\n"
|
|
"\n"
|
|
"Expand tabs in a string, i.e. replace them by one or more spaces,\n"
|
|
"depending on the current column and the given tab size (default 8).\n"
|
|
"The column number is reset to zero after each newline occurring in the\n"
|
|
"string. This doesn't understand other non-printing characters.");
|
|
|
|
static PyObject *
|
|
strop_expandtabs(PyObject *self, PyObject *args)
|
|
{
|
|
/* Original by Fredrik Lundh */
|
|
char* e;
|
|
char* p;
|
|
char* q;
|
|
Py_ssize_t i, j;
|
|
PyObject* out;
|
|
char* string;
|
|
Py_ssize_t stringlen;
|
|
int tabsize = 8;
|
|
|
|
WARN;
|
|
/* Get arguments */
|
|
if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
|
|
return NULL;
|
|
if (tabsize < 1) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"tabsize must be at least 1");
|
|
return NULL;
|
|
}
|
|
|
|
/* First pass: determine size of output string */
|
|
i = j = 0; /* j: current column; i: total of previous lines */
|
|
e = string + stringlen;
|
|
for (p = string; p < e; p++) {
|
|
if (*p == '\t') {
|
|
Py_ssize_t incr = tabsize - (j%tabsize);
|
|
if (j > PY_SSIZE_T_MAX - incr)
|
|
goto overflow;
|
|
j += incr;
|
|
} else {
|
|
if (j > PY_SSIZE_T_MAX - 1)
|
|
goto overflow;
|
|
j++;
|
|
if (*p == '\n') {
|
|
if (i > PY_SSIZE_T_MAX - j)
|
|
goto overflow;
|
|
i += j;
|
|
j = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (i > PY_SSIZE_T_MAX - j)
|
|
goto overflow;
|
|
|
|
/* Second pass: create output string and fill it */
|
|
out = PyString_FromStringAndSize(NULL, i+j);
|
|
if (out == NULL)
|
|
return NULL;
|
|
|
|
i = 0;
|
|
q = PyString_AS_STRING(out);
|
|
|
|
for (p = string; p < e; p++) {
|
|
if (*p == '\t') {
|
|
j = tabsize - (i%tabsize);
|
|
i += j;
|
|
while (j-- > 0)
|
|
*q++ = ' ';
|
|
} else {
|
|
*q++ = *p;
|
|
i++;
|
|
if (*p == '\n')
|
|
i = 0;
|
|
}
|
|
}
|
|
|
|
return out;
|
|
overflow:
|
|
PyErr_SetString(PyExc_OverflowError, "result is too long");
|
|
return NULL;
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(count__doc__,
|
|
"count(s, sub[, start[, end]]) -> int\n"
|
|
"\n"
|
|
"Return the number of occurrences of substring sub in string\n"
|
|
"s[start:end]. Optional arguments start and end are\n"
|
|
"interpreted as in slice notation.");
|
|
|
|
static PyObject *
|
|
strop_count(PyObject *self, PyObject *args)
|
|
{
|
|
char *s, *sub;
|
|
Py_ssize_t len, n;
|
|
Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
|
|
Py_ssize_t m, r;
|
|
|
|
WARN;
|
|
if (!PyArg_ParseTuple(args, "t#t#|nn:count", &s, &len, &sub, &n, &i, &last))
|
|
return NULL;
|
|
if (last > len)
|
|
last = len;
|
|
if (last < 0)
|
|
last += len;
|
|
if (last < 0)
|
|
last = 0;
|
|
if (i < 0)
|
|
i += len;
|
|
if (i < 0)
|
|
i = 0;
|
|
m = last + 1 - n;
|
|
if (n == 0)
|
|
return PyInt_FromLong((long) (m-i));
|
|
|
|
r = 0;
|
|
while (i < m) {
|
|
if (!memcmp(s+i, sub, n)) {
|
|
r++;
|
|
i += n;
|
|
} else {
|
|
i++;
|
|
}
|
|
}
|
|
return PyInt_FromLong((long) r);
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(swapcase__doc__,
|
|
"swapcase(s) -> string\n"
|
|
"\n"
|
|
"Return a copy of the string s with upper case characters\n"
|
|
"converted to lowercase and vice versa.");
|
|
|
|
static PyObject *
|
|
strop_swapcase(PyObject *self, PyObject *args)
|
|
{
|
|
char *s, *s_new;
|
|
Py_ssize_t i, n;
|
|
PyObject *newstr;
|
|
int changed;
|
|
|
|
WARN;
|
|
if (PyString_AsStringAndSize(args, &s, &n))
|
|
return NULL;
|
|
newstr = PyString_FromStringAndSize(NULL, n);
|
|
if (newstr == NULL)
|
|
return NULL;
|
|
s_new = PyString_AsString(newstr);
|
|
changed = 0;
|
|
for (i = 0; i < n; i++) {
|
|
int c = Py_CHARMASK(*s++);
|
|
if (islower(c)) {
|
|
changed = 1;
|
|
*s_new = toupper(c);
|
|
}
|
|
else if (isupper(c)) {
|
|
changed = 1;
|
|
*s_new = tolower(c);
|
|
}
|
|
else
|
|
*s_new = c;
|
|
s_new++;
|
|
}
|
|
if (!changed) {
|
|
Py_DECREF(newstr);
|
|
Py_INCREF(args);
|
|
return args;
|
|
}
|
|
return newstr;
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(atoi__doc__,
|
|
"atoi(s [,base]) -> int\n"
|
|
"\n"
|
|
"Return the integer represented by the string s in the given\n"
|
|
"base, which defaults to 10. The string s must consist of one\n"
|
|
"or more digits, possibly preceded by a sign. If base is 0, it\n"
|
|
"is chosen from the leading characters of s, 0 for octal, 0x or\n"
|
|
"0X for hexadecimal. If base is 16, a preceding 0x or 0X is\n"
|
|
"accepted.");
|
|
|
|
static PyObject *
|
|
strop_atoi(PyObject *self, PyObject *args)
|
|
{
|
|
char *s, *end;
|
|
int base = 10;
|
|
long x;
|
|
char buffer[256]; /* For errors */
|
|
|
|
WARN;
|
|
if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
|
|
return NULL;
|
|
|
|
if ((base != 0 && base < 2) || base > 36) {
|
|
PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
|
|
return NULL;
|
|
}
|
|
|
|
while (*s && isspace(Py_CHARMASK(*s)))
|
|
s++;
|
|
errno = 0;
|
|
if (base == 0 && s[0] == '0')
|
|
x = (long) PyOS_strtoul(s, &end, base);
|
|
else
|
|
x = PyOS_strtol(s, &end, base);
|
|
if (end == s || !isalnum(Py_CHARMASK(end[-1])))
|
|
goto bad;
|
|
while (*end && isspace(Py_CHARMASK(*end)))
|
|
end++;
|
|
if (*end != '\0') {
|
|
bad:
|
|
PyOS_snprintf(buffer, sizeof(buffer),
|
|
"invalid literal for atoi(): %.200s", s);
|
|
PyErr_SetString(PyExc_ValueError, buffer);
|
|
return NULL;
|
|
}
|
|
else if (errno != 0) {
|
|
PyOS_snprintf(buffer, sizeof(buffer),
|
|
"atoi() literal too large: %.200s", s);
|
|
PyErr_SetString(PyExc_ValueError, buffer);
|
|
return NULL;
|
|
}
|
|
return PyInt_FromLong(x);
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(atol__doc__,
|
|
"atol(s [,base]) -> long\n"
|
|
"\n"
|
|
"Return the long integer represented by the string s in the\n"
|
|
"given base, which defaults to 10. The string s must consist\n"
|
|
"of one or more digits, possibly preceded by a sign. If base\n"
|
|
"is 0, it is chosen from the leading characters of s, 0 for\n"
|
|
"octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n"
|
|
"0x or 0X is accepted. A trailing L or l is not accepted,\n"
|
|
"unless base is 0.");
|
|
|
|
static PyObject *
|
|
strop_atol(PyObject *self, PyObject *args)
|
|
{
|
|
char *s, *end;
|
|
int base = 10;
|
|
PyObject *x;
|
|
char buffer[256]; /* For errors */
|
|
|
|
WARN;
|
|
if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
|
|
return NULL;
|
|
|
|
if ((base != 0 && base < 2) || base > 36) {
|
|
PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
|
|
return NULL;
|
|
}
|
|
|
|
while (*s && isspace(Py_CHARMASK(*s)))
|
|
s++;
|
|
if (s[0] == '\0') {
|
|
PyErr_SetString(PyExc_ValueError, "empty string for atol()");
|
|
return NULL;
|
|
}
|
|
x = PyLong_FromString(s, &end, base);
|
|
if (x == NULL)
|
|
return NULL;
|
|
if (base == 0 && (*end == 'l' || *end == 'L'))
|
|
end++;
|
|
while (*end && isspace(Py_CHARMASK(*end)))
|
|
end++;
|
|
if (*end != '\0') {
|
|
PyOS_snprintf(buffer, sizeof(buffer),
|
|
"invalid literal for atol(): %.200s", s);
|
|
PyErr_SetString(PyExc_ValueError, buffer);
|
|
Py_DECREF(x);
|
|
return NULL;
|
|
}
|
|
return x;
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(atof__doc__,
|
|
"atof(s) -> float\n"
|
|
"\n"
|
|
"Return the floating point number represented by the string s.");
|
|
|
|
static PyObject *
|
|
strop_atof(PyObject *self, PyObject *args)
|
|
{
|
|
char *s, *end;
|
|
double x;
|
|
char buffer[256]; /* For errors */
|
|
|
|
WARN;
|
|
if (!PyArg_ParseTuple(args, "s:atof", &s))
|
|
return NULL;
|
|
while (*s && isspace(Py_CHARMASK(*s)))
|
|
s++;
|
|
if (s[0] == '\0') {
|
|
PyErr_SetString(PyExc_ValueError, "empty string for atof()");
|
|
return NULL;
|
|
}
|
|
|
|
PyFPE_START_PROTECT("strop_atof", return 0)
|
|
x = PyOS_string_to_double(s, &end, PyExc_OverflowError);
|
|
PyFPE_END_PROTECT(x)
|
|
if (x == -1 && PyErr_Occurred())
|
|
return NULL;
|
|
while (*end && isspace(Py_CHARMASK(*end)))
|
|
end++;
|
|
if (*end != '\0') {
|
|
PyOS_snprintf(buffer, sizeof(buffer),
|
|
"invalid literal for atof(): %.200s", s);
|
|
PyErr_SetString(PyExc_ValueError, buffer);
|
|
return NULL;
|
|
}
|
|
return PyFloat_FromDouble(x);
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(maketrans__doc__,
|
|
"maketrans(frm, to) -> string\n"
|
|
"\n"
|
|
"Return a translation table (a string of 256 bytes long)\n"
|
|
"suitable for use in string.translate. The strings frm and to\n"
|
|
"must be of the same length.");
|
|
|
|
static PyObject *
|
|
strop_maketrans(PyObject *self, PyObject *args)
|
|
{
|
|
unsigned char *c, *from=NULL, *to=NULL;
|
|
Py_ssize_t i, fromlen=0, tolen=0;
|
|
PyObject *result;
|
|
|
|
if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
|
|
return NULL;
|
|
|
|
if (fromlen != tolen) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"maketrans arguments must have same length");
|
|
return NULL;
|
|
}
|
|
|
|
result = PyString_FromStringAndSize((char *)NULL, 256);
|
|
if (result == NULL)
|
|
return NULL;
|
|
c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
|
|
for (i = 0; i < 256; i++)
|
|
c[i]=(unsigned char)i;
|
|
for (i = 0; i < fromlen; i++)
|
|
c[from[i]]=to[i];
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(translate__doc__,
|
|
"translate(s,table [,deletechars]) -> string\n"
|
|
"\n"
|
|
"Return a copy of the string s, where all characters occurring\n"
|
|
"in the optional argument deletechars are removed, and the\n"
|
|
"remaining characters have been mapped through the given\n"
|
|
"translation table, which must be a string of length 256.");
|
|
|
|
static PyObject *
|
|
strop_translate(PyObject *self, PyObject *args)
|
|
{
|
|
register char *input, *table, *output;
|
|
Py_ssize_t i;
|
|
int c, changed = 0;
|
|
PyObject *input_obj;
|
|
char *table1, *output_start, *del_table=NULL;
|
|
Py_ssize_t inlen, tablen, dellen = 0;
|
|
PyObject *result;
|
|
int trans_table[256];
|
|
|
|
WARN;
|
|
if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
|
|
&table1, &tablen, &del_table, &dellen))
|
|
return NULL;
|
|
if (tablen != 256) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"translation table must be 256 characters long");
|
|
return NULL;
|
|
}
|
|
|
|
table = table1;
|
|
inlen = PyString_GET_SIZE(input_obj);
|
|
result = PyString_FromStringAndSize((char *)NULL, inlen);
|
|
if (result == NULL)
|
|
return NULL;
|
|
output_start = output = PyString_AsString(result);
|
|
input = PyString_AsString(input_obj);
|
|
|
|
if (dellen == 0) {
|
|
/* If no deletions are required, use faster code */
|
|
for (i = inlen; --i >= 0; ) {
|
|
c = Py_CHARMASK(*input++);
|
|
if (Py_CHARMASK((*output++ = table[c])) != c)
|
|
changed = 1;
|
|
}
|
|
if (changed)
|
|
return result;
|
|
Py_DECREF(result);
|
|
Py_INCREF(input_obj);
|
|
return input_obj;
|
|
}
|
|
|
|
for (i = 0; i < 256; i++)
|
|
trans_table[i] = Py_CHARMASK(table[i]);
|
|
|
|
for (i = 0; i < dellen; i++)
|
|
trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
|
|
|
|
for (i = inlen; --i >= 0; ) {
|
|
c = Py_CHARMASK(*input++);
|
|
if (trans_table[c] != -1)
|
|
if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
|
|
continue;
|
|
changed = 1;
|
|
}
|
|
if (!changed) {
|
|
Py_DECREF(result);
|
|
Py_INCREF(input_obj);
|
|
return input_obj;
|
|
}
|
|
/* Fix the size of the resulting string */
|
|
if (inlen > 0)
|
|
_PyString_Resize(&result, output - output_start);
|
|
return result;
|
|
}
|
|
|
|
|
|
/* What follows is used for implementing replace(). Perry Stoll. */
|
|
|
|
/*
|
|
mymemfind
|
|
|
|
strstr replacement for arbitrary blocks of memory.
|
|
|
|
Locates the first occurrence in the memory pointed to by MEM of the
|
|
contents of memory pointed to by PAT. Returns the index into MEM if
|
|
found, or -1 if not found. If len of PAT is greater than length of
|
|
MEM, the function returns -1.
|
|
*/
|
|
static Py_ssize_t
|
|
mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
|
|
{
|
|
register Py_ssize_t ii;
|
|
|
|
/* pattern can not occur in the last pat_len-1 chars */
|
|
len -= pat_len;
|
|
|
|
for (ii = 0; ii <= len; ii++) {
|
|
if (mem[ii] == pat[0] &&
|
|
(pat_len == 1 ||
|
|
memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
|
|
return ii;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
mymemcnt
|
|
|
|
Return the number of distinct times PAT is found in MEM.
|
|
meaning mem=1111 and pat==11 returns 2.
|
|
mem=11111 and pat==11 also return 2.
|
|
*/
|
|
static Py_ssize_t
|
|
mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
|
|
{
|
|
register Py_ssize_t offset = 0;
|
|
Py_ssize_t nfound = 0;
|
|
|
|
while (len >= 0) {
|
|
offset = mymemfind(mem, len, pat, pat_len);
|
|
if (offset == -1)
|
|
break;
|
|
mem += offset + pat_len;
|
|
len -= offset + pat_len;
|
|
nfound++;
|
|
}
|
|
return nfound;
|
|
}
|
|
|
|
/*
|
|
mymemreplace
|
|
|
|
Return a string in which all occurrences of PAT in memory STR are
|
|
replaced with SUB.
|
|
|
|
If length of PAT is less than length of STR or there are no occurrences
|
|
of PAT in STR, then the original string is returned. Otherwise, a new
|
|
string is allocated here and returned.
|
|
|
|
on return, out_len is:
|
|
the length of output string, or
|
|
-1 if the input string is returned, or
|
|
unchanged if an error occurs (no memory).
|
|
|
|
return value is:
|
|
the new string allocated locally, or
|
|
NULL if an error occurred.
|
|
*/
|
|
static char *
|
|
mymemreplace(const char *str, Py_ssize_t len, /* input string */
|
|
const char *pat, Py_ssize_t pat_len, /* pattern string to find */
|
|
const char *sub, Py_ssize_t sub_len, /* substitution string */
|
|
Py_ssize_t count, /* number of replacements */
|
|
Py_ssize_t *out_len)
|
|
{
|
|
char *out_s;
|
|
char *new_s;
|
|
Py_ssize_t nfound, offset, new_len;
|
|
|
|
if (len == 0 || pat_len > len)
|
|
goto return_same;
|
|
|
|
/* find length of output string */
|
|
nfound = mymemcnt(str, len, pat, pat_len);
|
|
if (count < 0)
|
|
count = PY_SSIZE_T_MAX;
|
|
else if (nfound > count)
|
|
nfound = count;
|
|
if (nfound == 0)
|
|
goto return_same;
|
|
|
|
new_len = len + nfound*(sub_len - pat_len);
|
|
if (new_len == 0) {
|
|
/* Have to allocate something for the caller to free(). */
|
|
out_s = (char *)PyMem_MALLOC(1);
|
|
if (out_s == NULL)
|
|
return NULL;
|
|
out_s[0] = '\0';
|
|
}
|
|
else {
|
|
assert(new_len > 0);
|
|
new_s = (char *)PyMem_MALLOC(new_len);
|
|
if (new_s == NULL)
|
|
return NULL;
|
|
out_s = new_s;
|
|
|
|
for (; count > 0 && len > 0; --count) {
|
|
/* find index of next instance of pattern */
|
|
offset = mymemfind(str, len, pat, pat_len);
|
|
if (offset == -1)
|
|
break;
|
|
|
|
/* copy non matching part of input string */
|
|
memcpy(new_s, str, offset);
|
|
str += offset + pat_len;
|
|
len -= offset + pat_len;
|
|
|
|
/* copy substitute into the output string */
|
|
new_s += offset;
|
|
memcpy(new_s, sub, sub_len);
|
|
new_s += sub_len;
|
|
}
|
|
/* copy any remaining values into output string */
|
|
if (len > 0)
|
|
memcpy(new_s, str, len);
|
|
}
|
|
*out_len = new_len;
|
|
return out_s;
|
|
|
|
return_same:
|
|
*out_len = -1;
|
|
return (char *)str; /* cast away const */
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(replace__doc__,
|
|
"replace (str, old, new[, maxsplit]) -> string\n"
|
|
"\n"
|
|
"Return a copy of string str with all occurrences of substring\n"
|
|
"old replaced by new. If the optional argument maxsplit is\n"
|
|
"given, only the first maxsplit occurrences are replaced.");
|
|
|
|
static PyObject *
|
|
strop_replace(PyObject *self, PyObject *args)
|
|
{
|
|
char *str, *pat,*sub,*new_s;
|
|
Py_ssize_t len,pat_len,sub_len,out_len;
|
|
Py_ssize_t count = -1;
|
|
PyObject *newstr;
|
|
|
|
WARN;
|
|
if (!PyArg_ParseTuple(args, "t#t#t#|n:replace",
|
|
&str, &len, &pat, &pat_len, &sub, &sub_len,
|
|
&count))
|
|
return NULL;
|
|
if (pat_len <= 0) {
|
|
PyErr_SetString(PyExc_ValueError, "empty pattern string");
|
|
return NULL;
|
|
}
|
|
/* CAUTION: strop treats a replace count of 0 as infinity, unlke
|
|
* current (2.1) string.py and string methods. Preserve this for
|
|
* ... well, hard to say for what <wink>.
|
|
*/
|
|
if (count == 0)
|
|
count = -1;
|
|
new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
|
|
if (new_s == NULL) {
|
|
PyErr_NoMemory();
|
|
return NULL;
|
|
}
|
|
if (out_len == -1) {
|
|
/* we're returning another reference to the input string */
|
|
newstr = PyTuple_GetItem(args, 0);
|
|
Py_XINCREF(newstr);
|
|
}
|
|
else {
|
|
newstr = PyString_FromStringAndSize(new_s, out_len);
|
|
PyMem_FREE(new_s);
|
|
}
|
|
return newstr;
|
|
}
|
|
|
|
|
|
/* List of functions defined in the module */
|
|
|
|
static PyMethodDef
|
|
strop_methods[] = {
|
|
{"atof", strop_atof, METH_VARARGS, atof__doc__},
|
|
{"atoi", strop_atoi, METH_VARARGS, atoi__doc__},
|
|
{"atol", strop_atol, METH_VARARGS, atol__doc__},
|
|
{"capitalize", strop_capitalize, METH_O, capitalize__doc__},
|
|
{"count", strop_count, METH_VARARGS, count__doc__},
|
|
{"expandtabs", strop_expandtabs, METH_VARARGS, expandtabs__doc__},
|
|
{"find", strop_find, METH_VARARGS, find__doc__},
|
|
{"join", strop_joinfields, METH_VARARGS, joinfields__doc__},
|
|
{"joinfields", strop_joinfields, METH_VARARGS, joinfields__doc__},
|
|
{"lstrip", strop_lstrip, METH_O, lstrip__doc__},
|
|
{"lower", strop_lower, METH_O, lower__doc__},
|
|
{"maketrans", strop_maketrans, METH_VARARGS, maketrans__doc__},
|
|
{"replace", strop_replace, METH_VARARGS, replace__doc__},
|
|
{"rfind", strop_rfind, METH_VARARGS, rfind__doc__},
|
|
{"rstrip", strop_rstrip, METH_O, rstrip__doc__},
|
|
{"split", strop_splitfields, METH_VARARGS, splitfields__doc__},
|
|
{"splitfields", strop_splitfields, METH_VARARGS, splitfields__doc__},
|
|
{"strip", strop_strip, METH_O, strip__doc__},
|
|
{"swapcase", strop_swapcase, METH_O, swapcase__doc__},
|
|
{"translate", strop_translate, METH_VARARGS, translate__doc__},
|
|
{"upper", strop_upper, METH_O, upper__doc__},
|
|
{NULL, NULL} /* sentinel */
|
|
};
|
|
|
|
|
|
PyMODINIT_FUNC
|
|
initstrop(void)
|
|
{
|
|
PyObject *m, *s;
|
|
char buf[256];
|
|
int c, n;
|
|
m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
|
|
(PyObject*)NULL, PYTHON_API_VERSION);
|
|
if (m == NULL)
|
|
return;
|
|
|
|
/* Create 'whitespace' object */
|
|
n = 0;
|
|
for (c = 0; c < 256; c++) {
|
|
if (isspace(c))
|
|
buf[n++] = c;
|
|
}
|
|
s = PyString_FromStringAndSize(buf, n);
|
|
if (s)
|
|
PyModule_AddObject(m, "whitespace", s);
|
|
|
|
/* Create 'lowercase' object */
|
|
n = 0;
|
|
for (c = 0; c < 256; c++) {
|
|
if (islower(c))
|
|
buf[n++] = c;
|
|
}
|
|
s = PyString_FromStringAndSize(buf, n);
|
|
if (s)
|
|
PyModule_AddObject(m, "lowercase", s);
|
|
|
|
/* Create 'uppercase' object */
|
|
n = 0;
|
|
for (c = 0; c < 256; c++) {
|
|
if (isupper(c))
|
|
buf[n++] = c;
|
|
}
|
|
s = PyString_FromStringAndSize(buf, n);
|
|
if (s)
|
|
PyModule_AddObject(m, "uppercase", s);
|
|
}
|