mirror of
https://github.com/akheron/jansson.git
synced 2025-04-06 05:55:05 +00:00
Implement JSON_ENSURE_ASCII encoding flag
With this flag, all Unicode characters outside the ASCII range are escaped.
This commit is contained in:
parent
d67aeb9739
commit
50031440a3
7 changed files with 150 additions and 24 deletions
|
@ -519,6 +519,13 @@ can be ORed together to obtain *flags*.
|
|||
|
||||
.. versionadded:: 1.2
|
||||
|
||||
``JSON_ENSURE_ASCII``
|
||||
If this flag is used, the output is guaranteed to consist only of
|
||||
ASCII characters. This is achived by escaping all Unicode
|
||||
characters outside the ASCII range.
|
||||
|
||||
.. versionadded:: 1.2
|
||||
|
||||
The following functions perform the actual JSON encoding. The result
|
||||
is in UTF-8.
|
||||
|
||||
|
|
67
src/dump.c
67
src/dump.c
|
@ -14,6 +14,7 @@
|
|||
#include <jansson.h>
|
||||
#include "jansson_private.h"
|
||||
#include "strbuffer.h"
|
||||
#include "utf.h"
|
||||
|
||||
#define MAX_INTEGER_STR_LENGTH 100
|
||||
#define MAX_REAL_STR_LENGTH 100
|
||||
|
@ -65,34 +66,49 @@ static int dump_indent(unsigned long flags, int depth, int space, dump_func dump
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int dump_string(const char *str, dump_func dump, void *data)
|
||||
static int dump_string(const char *str, int ascii, dump_func dump, void *data)
|
||||
{
|
||||
const char *end;
|
||||
const char *pos, *end;
|
||||
int32_t codepoint;
|
||||
|
||||
if(dump("\"", 1, data))
|
||||
return -1;
|
||||
|
||||
end = str;
|
||||
end = pos = str;
|
||||
while(1)
|
||||
{
|
||||
const char *text;
|
||||
char seq[7];
|
||||
char seq[13];
|
||||
int length;
|
||||
|
||||
while(*end && *end != '\\' && *end != '"' && (unsigned char)*end > 0x1F)
|
||||
end++;
|
||||
while(*end)
|
||||
{
|
||||
end = utf8_iterate(pos, &codepoint);
|
||||
if(!end)
|
||||
return -1;
|
||||
|
||||
if(end != str) {
|
||||
if(dump(str, end - str, data))
|
||||
/* mandatory escape or control char */
|
||||
if(codepoint == '\\' || codepoint == '"' || codepoint < 0x20)
|
||||
break;
|
||||
|
||||
/* non-ASCII */
|
||||
if(ascii && codepoint > 0x7F)
|
||||
break;
|
||||
|
||||
pos = end;
|
||||
}
|
||||
|
||||
if(pos != str) {
|
||||
if(dump(str, pos - str, data))
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(!*end)
|
||||
if(end == pos)
|
||||
break;
|
||||
|
||||
/* handle \, ", and control codes */
|
||||
length = 2;
|
||||
switch(*end)
|
||||
switch(codepoint)
|
||||
{
|
||||
case '\\': text = "\\\\"; break;
|
||||
case '\"': text = "\\\""; break;
|
||||
|
@ -103,9 +119,27 @@ static int dump_string(const char *str, dump_func dump, void *data)
|
|||
case '\t': text = "\\t"; break;
|
||||
default:
|
||||
{
|
||||
sprintf(seq, "\\u00%02x", *end);
|
||||
/* codepoint is in BMP */
|
||||
if(codepoint < 0x10000)
|
||||
{
|
||||
sprintf(seq, "\\u%04x", codepoint);
|
||||
length = 6;
|
||||
}
|
||||
|
||||
/* not in BMP -> construct a UTF-16 surrogate pair */
|
||||
else
|
||||
{
|
||||
int32_t first, last;
|
||||
|
||||
codepoint -= 0x10000;
|
||||
first = 0xD800 | ((codepoint & 0xffc00) >> 10);
|
||||
last = 0xDC00 | (codepoint & 0x003ff);
|
||||
|
||||
sprintf(seq, "\\u%04x\\u%04x", first, last);
|
||||
length = 12;
|
||||
}
|
||||
|
||||
text = seq;
|
||||
length = 6;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -113,8 +147,7 @@ static int dump_string(const char *str, dump_func dump, void *data)
|
|||
if(dump(text, length, data))
|
||||
return -1;
|
||||
|
||||
end++;
|
||||
str = end;
|
||||
str = pos = end;
|
||||
}
|
||||
|
||||
return dump("\"", 1, data);
|
||||
|
@ -123,6 +156,8 @@ static int dump_string(const char *str, dump_func dump, void *data)
|
|||
static int do_dump(const json_t *json, unsigned long flags, int depth,
|
||||
dump_func dump, void *data)
|
||||
{
|
||||
int ascii = flags & JSON_ENSURE_ASCII ? 1 : 0;
|
||||
|
||||
switch(json_typeof(json)) {
|
||||
case JSON_NULL:
|
||||
return dump("null", 4, data);
|
||||
|
@ -158,7 +193,7 @@ static int do_dump(const json_t *json, unsigned long flags, int depth,
|
|||
}
|
||||
|
||||
case JSON_STRING:
|
||||
return dump_string(json_string_value(json), dump, data);
|
||||
return dump_string(json_string_value(json), ascii, dump, data);
|
||||
|
||||
case JSON_ARRAY:
|
||||
{
|
||||
|
@ -238,7 +273,7 @@ static int do_dump(const json_t *json, unsigned long flags, int depth,
|
|||
{
|
||||
void *next = json_object_iter_next((json_t *)json, iter);
|
||||
|
||||
dump_string(json_object_iter_key(iter), dump, data);
|
||||
dump_string(json_object_iter_key(iter), ascii, dump, data);
|
||||
if(dump(separator, separator_length, data) ||
|
||||
do_dump(json_object_iter_value(iter), flags, depth + 1,
|
||||
dump, data))
|
||||
|
|
|
@ -141,8 +141,9 @@ json_t *json_loads(const char *input, json_error_t *error);
|
|||
json_t *json_loadf(FILE *input, json_error_t *error);
|
||||
json_t *json_load_file(const char *path, json_error_t *error);
|
||||
|
||||
#define JSON_INDENT(n) (n & 0xFF)
|
||||
#define JSON_COMPACT 0x100
|
||||
#define JSON_INDENT(n) (n & 0xFF)
|
||||
#define JSON_COMPACT 0x100
|
||||
#define JSON_ENSURE_ASCII 0x200
|
||||
|
||||
char *json_dumps(const json_t *json, unsigned long flags);
|
||||
int json_dumpf(const json_t *json, FILE *output, unsigned long flags);
|
||||
|
|
|
@ -149,7 +149,7 @@ static char stream_get(stream_t *stream, json_error_t *error)
|
|||
for(i = 1; i < count; i++)
|
||||
stream->buffer[i] = stream->get(stream->data);
|
||||
|
||||
if(!utf8_check_full(stream->buffer, count))
|
||||
if(!utf8_check_full(stream->buffer, count, NULL))
|
||||
goto out;
|
||||
|
||||
stream->stream_pos += count;
|
||||
|
|
33
src/utf.c
33
src/utf.c
|
@ -80,7 +80,7 @@ int utf8_check_first(char byte)
|
|||
}
|
||||
}
|
||||
|
||||
int utf8_check_full(const char *buffer, int size)
|
||||
int utf8_check_full(const char *buffer, int size, int32_t *codepoint)
|
||||
{
|
||||
int i;
|
||||
int32_t value = 0;
|
||||
|
@ -130,9 +130,38 @@ int utf8_check_full(const char *buffer, int size)
|
|||
return 0;
|
||||
}
|
||||
|
||||
if(codepoint)
|
||||
*codepoint = value;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char *utf8_iterate(const char *buffer, int32_t *codepoint)
|
||||
{
|
||||
int count;
|
||||
int32_t value;
|
||||
|
||||
if(!*buffer)
|
||||
return buffer;
|
||||
|
||||
count = utf8_check_first(buffer[0]);
|
||||
if(count <= 0)
|
||||
return NULL;
|
||||
|
||||
if(count == 1)
|
||||
value = (unsigned char)buffer[0];
|
||||
else
|
||||
{
|
||||
if(!utf8_check_full(buffer, count, &value))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(codepoint)
|
||||
*codepoint = value;
|
||||
|
||||
return buffer + count;
|
||||
}
|
||||
|
||||
int utf8_check_string(const char *string, int length)
|
||||
{
|
||||
int i;
|
||||
|
@ -150,7 +179,7 @@ int utf8_check_string(const char *string, int length)
|
|||
if(i + count > length)
|
||||
return 0;
|
||||
|
||||
if(!utf8_check_full(&string[i], count))
|
||||
if(!utf8_check_full(&string[i], count, NULL))
|
||||
return 0;
|
||||
|
||||
i += count - 1;
|
||||
|
|
|
@ -11,7 +11,8 @@
|
|||
int utf8_encode(int codepoint, char *buffer, int *size);
|
||||
|
||||
int utf8_check_first(char byte);
|
||||
int utf8_check_full(const char *buffer, int size);
|
||||
int utf8_check_full(const char *buffer, int size, int32_t *codepoint);
|
||||
const char *utf8_iterate(const char *buffer, int32_t *codepoint);
|
||||
|
||||
int utf8_check_string(const char *string, int length);
|
||||
|
||||
|
|
|
@ -131,8 +131,8 @@ static void test_compact()
|
|||
|
||||
#define INDENTED_COMPACT_OBJECT \
|
||||
"{\n" \
|
||||
" \"a\":1,\n" \
|
||||
" \"b\":2\n" \
|
||||
" \"a\":1,\n" \
|
||||
" \"b\":2\n" \
|
||||
"}"
|
||||
#define INDENTED_COMPACT_ARRAY \
|
||||
"[\n" \
|
||||
|
@ -163,12 +163,65 @@ static void test_compact_indent()
|
|||
json_decref(array);
|
||||
}
|
||||
|
||||
|
||||
static const char *test_ensure_ascii_data[][2] = {
|
||||
/*
|
||||
{ "input", "output" }
|
||||
*/
|
||||
|
||||
/* ascii */
|
||||
{ "foo", "foo" },
|
||||
|
||||
/* BMP */
|
||||
{ "\xc3\xa4 \xc3\xb6 \xc3\xa5", "\\u00e4 \\u00f6 \\u00e5" },
|
||||
{ "foo \xc3\xa4\xc3\xa5", "foo \\u00e4\\u00e5" },
|
||||
{ "\xc3\xa4\xc3\xa5 foo", "\\u00e4\\u00e5 foo" },
|
||||
{ "\xc3\xa4 foo \xc3\xa5", "\\u00e4 foo \\u00e5" },
|
||||
|
||||
/* non-BMP */
|
||||
{ "clef g: \xf0\x9d\x84\x9e", "clef g: \\ud834\\udd1e" },
|
||||
};
|
||||
|
||||
static void test_ensure_ascii()
|
||||
{
|
||||
int i;
|
||||
int num_tests = sizeof(test_ensure_ascii_data) / sizeof(const char *) / 2;
|
||||
|
||||
for(i = 0; i < num_tests; i++) {
|
||||
json_t *array, *string;
|
||||
const char *input, *output;
|
||||
char *result, *stripped;
|
||||
|
||||
input = test_ensure_ascii_data[i][0];
|
||||
output = test_ensure_ascii_data[i][1];
|
||||
|
||||
array = json_array();
|
||||
string = json_string(input);
|
||||
if(!array || !string)
|
||||
fail("unable to create json values");
|
||||
|
||||
json_array_append(array, string);
|
||||
result = json_dumps(array, JSON_ENSURE_ASCII);
|
||||
|
||||
/* strip leading [" and trailing "] */
|
||||
stripped = &result[2];
|
||||
stripped[strlen(stripped) - 2] = '\0';
|
||||
|
||||
if(strcmp(stripped, output) != 0) {
|
||||
free(result);
|
||||
fail("the result of json_dumps is invalid");
|
||||
}
|
||||
free(result);
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
test_normal();
|
||||
test_indent();
|
||||
test_compact();
|
||||
test_compact_indent();
|
||||
test_ensure_ascii();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue