LMMS
Loading...
Searching...
No Matches
n3.c
Go to the documentation of this file.
1/*
2 Copyright 2011-2020 David Robillard <d@drobilla.net>
3
4 Permission to use, copy, modify, and/or distribute this software for any
5 purpose with or without fee is hereby granted, provided that the above
6 copyright notice and this permission notice appear in all copies.
7
8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15*/
16
17#include "byte_source.h"
18#include "reader.h"
19#include "serd_internal.h"
20#include "stack.h"
21#include "string_utils.h"
22#include "uri_utils.h"
23
24#include "serd/serd.h"
25
26#include <assert.h>
27#include <stdbool.h>
28#include <stdint.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32
33#define TRY(st, exp) \
34 do { \
35 if (((st) = (exp))) { \
36 return (st); \
37 } \
38 } while (0)
39
40static inline bool
42{
43 return reader->syntax == SERD_TURTLE || reader->syntax == SERD_TRIG;
44}
45
46static SerdStatus
47read_collection(SerdReader* reader, ReadContext ctx, Ref* dest);
48
49static SerdStatus
50read_predicateObjectList(SerdReader* reader, ReadContext ctx, bool* ate_dot);
51
52static inline uint8_t
54{
55 const int c = peek_byte(reader);
56 if (is_xdigit(c)) {
57 return (uint8_t)eat_byte_safe(reader, c);
58 }
59
60 r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid hexadecimal digit `%c'\n", c);
61 return 0;
62}
63
64// Read UCHAR escape, initial \ is already eaten by caller
65static inline SerdStatus
66read_UCHAR(SerdReader* reader, Ref dest, uint32_t* char_code)
67{
68 const int b = peek_byte(reader);
69 unsigned length = 0;
70 switch (b) {
71 case 'U':
72 length = 8;
73 break;
74 case 'u':
75 length = 4;
76 break;
77 default:
79 }
80
81 eat_byte_safe(reader, b);
82
83 uint8_t buf[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
84 for (unsigned i = 0; i < length; ++i) {
85 if (!(buf[i] = read_HEX(reader))) {
87 }
88 }
89
90 char* endptr = NULL;
91 const uint32_t code = (uint32_t)strtoul((const char*)buf, &endptr, 16);
92 assert(endptr == (char*)buf + length);
93
94 unsigned size = 0;
95 if (code < 0x00000080) {
96 size = 1;
97 } else if (code < 0x00000800) {
98 size = 2;
99 } else if (code < 0x00010000) {
100 size = 3;
101 } else if (code < 0x00110000) {
102 size = 4;
103 } else {
104 r_err(reader,
106 "unicode character 0x%X out of range\n",
107 code);
108 push_bytes(reader, dest, replacement_char, 3);
109 *char_code = 0xFFFD;
110 return SERD_SUCCESS;
111 }
112
113 // Build output in buf
114 // (Note # of bytes = # of leading 1 bits in first byte)
115 uint32_t c = code;
116 switch (size) {
117 case 4:
118 buf[3] = (uint8_t)(0x80u | (c & 0x3Fu));
119 c >>= 6;
120 c |= (16 << 12); // set bit 4
121 /* fallthru */
122 case 3:
123 buf[2] = (uint8_t)(0x80u | (c & 0x3Fu));
124 c >>= 6;
125 c |= (32 << 6); // set bit 5
126 /* fallthru */
127 case 2:
128 buf[1] = (uint8_t)(0x80u | (c & 0x3Fu));
129 c >>= 6;
130 c |= 0xC0; // set bits 6 and 7
131 /* fallthru */
132 case 1:
133 buf[0] = (uint8_t)c;
134 /* fallthru */
135 default:
136 break;
137 }
138
139 push_bytes(reader, dest, buf, size);
140 *char_code = code;
141 return SERD_SUCCESS;
142}
143
144// Read ECHAR escape, initial \ is already eaten by caller
145static inline SerdStatus
147{
148 const int c = peek_byte(reader);
149 switch (c) {
150 case 't':
151 eat_byte_safe(reader, 't');
152 push_byte(reader, dest, '\t');
153 return SERD_SUCCESS;
154 case 'b':
155 eat_byte_safe(reader, 'b');
156 push_byte(reader, dest, '\b');
157 return SERD_SUCCESS;
158 case 'n':
159 *flags |= SERD_HAS_NEWLINE;
160 eat_byte_safe(reader, 'n');
161 push_byte(reader, dest, '\n');
162 return SERD_SUCCESS;
163 case 'r':
164 *flags |= SERD_HAS_NEWLINE;
165 eat_byte_safe(reader, 'r');
166 push_byte(reader, dest, '\r');
167 return SERD_SUCCESS;
168 case 'f':
169 eat_byte_safe(reader, 'f');
170 push_byte(reader, dest, '\f');
171 return SERD_SUCCESS;
172 case '\\':
173 case '"':
174 case '\'':
175 push_byte(reader, dest, eat_byte_safe(reader, c));
176 return SERD_SUCCESS;
177 default:
178 return SERD_ERR_BAD_SYNTAX;
179 }
180}
181
182static inline SerdStatus
183bad_char(SerdReader* reader, const char* fmt, uint8_t c)
184{
185 // Skip bytes until the next start byte
186 for (int b = peek_byte(reader); b != EOF && ((uint8_t)b & 0x80);) {
187 eat_byte_safe(reader, b);
188 b = peek_byte(reader);
189 }
190
191 r_err(reader, SERD_ERR_BAD_SYNTAX, fmt, c);
192 return reader->strict ? SERD_ERR_BAD_SYNTAX : SERD_FAILURE;
193}
194
195static SerdStatus
197{
199 if (*size <= 1 || *size > 4) {
200 return bad_char(reader, "invalid UTF-8 start 0x%X\n", c);
201 }
202
203 bytes[0] = c;
204 for (unsigned i = 1; i < *size; ++i) {
205 const int b = peek_byte(reader);
206 if (b == EOF || ((uint8_t)b & 0x80) == 0) {
207 return bad_char(reader, "invalid UTF-8 continuation 0x%X\n", (uint8_t)b);
208 }
209
210 eat_byte_safe(reader, b);
211 bytes[i] = (uint8_t)b;
212 }
213
214 return SERD_SUCCESS;
215}
216
217static SerdStatus
219{
220 uint32_t size = 0;
221 uint8_t bytes[4] = {0, 0, 0, 0};
222 SerdStatus st = read_utf8_bytes(reader, bytes, &size, c);
223 if (st) {
224 push_bytes(reader, dest, replacement_char, 3);
225 } else {
226 push_bytes(reader, dest, bytes, size);
227 }
228
229 return st;
230}
231
232static SerdStatus
234{
235 uint32_t size = 0;
236 uint8_t bytes[4] = {0, 0, 0, 0};
237 SerdStatus st = read_utf8_bytes(reader, bytes, &size, c);
238 if (st) {
239 push_bytes(reader, dest, replacement_char, 3);
240 return st;
241 }
242
243 push_bytes(reader, dest, bytes, size);
245 return st;
246}
247
248// Read one character (possibly multi-byte)
249// The first byte, c, has already been eaten by caller
250static inline SerdStatus
252{
253 if (!(c & 0x80)) {
254 switch (c) {
255 case 0xA:
256 case 0xD:
257 *flags |= SERD_HAS_NEWLINE;
258 break;
259 case '"':
260 case '\'':
261 *flags |= SERD_HAS_QUOTE;
262 break;
263 default:
264 break;
265 }
266 return push_byte(reader, dest, c);
267 }
268
269 return read_utf8_character(reader, dest, c);
270}
271
272// [10] comment ::= '#' ( [^#xA #xD] )*
273static void
275{
276 eat_byte_safe(reader, '#');
277 int c = 0;
278 while (((c = peek_byte(reader)) != 0xA) && c != 0xD && c != EOF && c) {
279 eat_byte_safe(reader, c);
280 }
281}
282
283// [24] ws ::= #x9 | #xA | #xD | #x20 | comment
284static inline bool
286{
287 const int c = peek_byte(reader);
288 switch (c) {
289 case 0x9:
290 case 0xA:
291 case 0xD:
292 case 0x20:
293 eat_byte_safe(reader, c);
294 return true;
295 case '#':
296 read_comment(reader);
297 return true;
298 default:
299 return false;
300 }
301}
302
303static inline bool
305{
306 while (read_ws(reader)) {
307 }
308
309 return true;
310}
311
312static inline bool
313peek_delim(SerdReader* reader, const char delim)
314{
315 read_ws_star(reader);
316 return peek_byte(reader) == delim;
317}
318
319static inline bool
320eat_delim(SerdReader* reader, const char delim)
321{
322 if (peek_delim(reader, delim)) {
323 eat_byte_safe(reader, delim);
324 return read_ws_star(reader);
325 }
326
327 return false;
328}
329
330// STRING_LITERAL_LONG_QUOTE and STRING_LITERAL_LONG_SINGLE_QUOTE
331// Initial triple quotes are already eaten by caller
332static SerdStatus
334 Ref ref,
335 SerdNodeFlags* flags,
336 uint8_t q)
337{
339
340 while (!(st && reader->strict)) {
341 const int c = peek_byte(reader);
342 if (c == '\\') {
343 eat_byte_safe(reader, c);
344 uint32_t code = 0;
345 if ((st = read_ECHAR(reader, ref, flags)) &&
346 (st = read_UCHAR(reader, ref, &code))) {
347 return r_err(reader, st, "invalid escape `\\%c'\n", peek_byte(reader));
348 }
349 } else if (c == q) {
350 eat_byte_safe(reader, q);
351 const int q2 = eat_byte_safe(reader, peek_byte(reader));
352 const int q3 = peek_byte(reader);
353 if (q2 == q && q3 == q) { // End of string
354 eat_byte_safe(reader, q3);
355 break;
356 }
357 *flags |= SERD_HAS_QUOTE;
358 push_byte(reader, ref, c);
359 st = read_character(reader, ref, flags, (uint8_t)q2);
360 } else if (c == EOF) {
361 return r_err(reader, SERD_ERR_BAD_SYNTAX, "end of file in long string\n");
362 } else {
363 st =
364 read_character(reader, ref, flags, (uint8_t)eat_byte_safe(reader, c));
365 }
366 }
367
368 return (st && reader->strict) ? st : SERD_SUCCESS;
369}
370
371// STRING_LITERAL_QUOTE and STRING_LITERAL_SINGLE_QUOTE
372// Initial quote is already eaten by caller
373static SerdStatus
375 Ref ref,
376 SerdNodeFlags* flags,
377 uint8_t q)
378{
380
381 while (!(st && reader->strict)) {
382 const int c = peek_byte(reader);
383 uint32_t code = 0;
384 switch (c) {
385 case EOF:
386 return r_err(
387 reader, SERD_ERR_BAD_SYNTAX, "end of file in short string\n");
388 case '\n':
389 case '\r':
390 return r_err(reader, SERD_ERR_BAD_SYNTAX, "line end in short string\n");
391 case '\\':
392 eat_byte_safe(reader, c);
393 if ((st = read_ECHAR(reader, ref, flags)) &&
394 (st = read_UCHAR(reader, ref, &code))) {
395 return r_err(reader, st, "invalid escape `\\%c'\n", peek_byte(reader));
396 }
397 break;
398 default:
399 if (c == q) {
400 eat_byte_check(reader, q);
401 return SERD_SUCCESS;
402 } else {
403 st =
404 read_character(reader, ref, flags, (uint8_t)eat_byte_safe(reader, c));
405 }
406 }
407 }
408
409 return st ? st
411}
412
413static SerdStatus
415{
416 const int q1 = peek_byte(reader);
417 eat_byte_safe(reader, q1);
418
419 const int q2 = peek_byte(reader);
420 if (q2 == EOF) {
421 return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n");
422 }
423
424 if (q2 != q1) { // Short string (not triple quoted)
425 return read_STRING_LITERAL(reader, node, flags, (uint8_t)q1);
426 }
427
428 eat_byte_safe(reader, q2);
429 const int q3 = peek_byte(reader);
430 if (q3 == EOF) {
431 return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n");
432 }
433
434 if (q3 != q1) { // Empty short string ("" or '')
435 return SERD_SUCCESS;
436 }
437
438 if (!fancy_syntax(reader)) {
439 return r_err(
440 reader, SERD_ERR_BAD_SYNTAX, "syntax does not support long literals\n");
441 }
442
443 eat_byte_safe(reader, q3);
444 return read_STRING_LITERAL_LONG(reader, node, flags, (uint8_t)q1);
445}
446
447static inline bool
449{
450 return ((c >= 0x00C0 && c <= 0x00D6) || (c >= 0x00D8 && c <= 0x00F6) ||
451 (c >= 0x00F8 && c <= 0x02FF) || (c >= 0x0370 && c <= 0x037D) ||
452 (c >= 0x037F && c <= 0x1FFF) || (c >= 0x200C && c <= 0x200D) ||
453 (c >= 0x2070 && c <= 0x218F) || (c >= 0x2C00 && c <= 0x2FEF) ||
454 (c >= 0x3001 && c <= 0xD7FF) || (c >= 0xF900 && c <= 0xFDCF) ||
455 (c >= 0xFDF0 && c <= 0xFFFD) || (c >= 0x10000 && c <= 0xEFFFF));
456}
457
458static SerdStatus
460{
461 uint32_t code = 0;
462 const int c = peek_byte(reader);
464 if (is_alpha(c)) {
465 push_byte(reader, dest, eat_byte_safe(reader, c));
466 } else if (c == EOF || !(c & 0x80)) {
467 return SERD_FAILURE;
468 } else if ((st = read_utf8_code(
469 reader, dest, &code, (uint8_t)eat_byte_safe(reader, c)))) {
470 return st;
471 } else if (!is_PN_CHARS_BASE(code)) {
472 r_err(
473 reader, SERD_ERR_BAD_SYNTAX, "invalid character U+%04X in name\n", code);
474 if (reader->strict) {
475 return SERD_ERR_BAD_SYNTAX;
476 }
477 }
478 return st;
479}
480
481static inline bool
483{
484 return (is_PN_CHARS_BASE(c) || c == 0xB7 || (c >= 0x0300 && c <= 0x036F) ||
485 (c >= 0x203F && c <= 0x2040));
486}
487
488static SerdStatus
490{
491 uint32_t code = 0;
492 const int c = peek_byte(reader);
494 if (is_alpha(c) || is_digit(c) || c == '_' || c == '-') {
495 push_byte(reader, dest, eat_byte_safe(reader, c));
496 } else if (c == EOF || !(c & 0x80)) {
497 return SERD_FAILURE;
498 } else if ((st = read_utf8_code(
499 reader, dest, &code, (uint8_t)eat_byte_safe(reader, c)))) {
500 return st;
501 } else if (!is_PN_CHARS(code)) {
502 return r_err(
503 reader, SERD_ERR_BAD_SYNTAX, "invalid character U+%04X in name\n", code);
504 }
505 return st;
506}
507
508static SerdStatus
510{
511 push_byte(reader, dest, eat_byte_safe(reader, '%'));
512 const uint8_t h1 = read_HEX(reader);
513 const uint8_t h2 = read_HEX(reader);
514 if (h1 && h2) {
515 push_byte(reader, dest, h1);
516 return push_byte(reader, dest, h2);
517 }
518
519 return SERD_ERR_BAD_SYNTAX;
520}
521
522static SerdStatus
524{
525 eat_byte_safe(reader, '\\');
526
527 const int c = peek_byte(reader);
528 switch (c) {
529 case '!':
530 case '#':
531 case '$':
532 case '%':
533 case '&':
534 case '\'':
535 case '(':
536 case ')':
537 case '*':
538 case '+':
539 case ',':
540 case '-':
541 case '.':
542 case '/':
543 case ';':
544 case '=':
545 case '?':
546 case '@':
547 case '_':
548 case '~':
549 push_byte(reader, dest, eat_byte_safe(reader, c));
550 break;
551 default:
552 return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid escape\n");
553 }
554
555 return SERD_SUCCESS;
556}
557
558static SerdStatus
559read_PLX(SerdReader* reader, Ref dest)
560{
561 const int c = peek_byte(reader);
562 switch (c) {
563 case '%':
564 return read_PERCENT(reader, dest);
565 case '\\':
566 return read_PN_LOCAL_ESC(reader, dest);
567 default:
568 return SERD_FAILURE;
569 }
570}
571
572static SerdStatus
573read_PN_LOCAL(SerdReader* reader, Ref dest, bool* ate_dot)
574{
575 int c = peek_byte(reader);
577 bool trailing_unescaped_dot = false;
578 switch (c) {
579 case '0':
580 case '1':
581 case '2':
582 case '3':
583 case '4':
584 case '5':
585 case '6':
586 case '7':
587 case '8':
588 case '9':
589 case ':':
590 case '_':
591 push_byte(reader, dest, eat_byte_safe(reader, c));
592 break;
593 default:
594 if ((st = read_PLX(reader, dest)) > SERD_FAILURE) {
595 return r_err(reader, st, "bad escape\n");
596 } else if (st != SERD_SUCCESS && read_PN_CHARS_BASE(reader, dest)) {
597 return SERD_FAILURE;
598 }
599 }
600
601 while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.' | ':')*
602 if (c == '.' || c == ':') {
603 push_byte(reader, dest, eat_byte_safe(reader, c));
604 } else if ((st = read_PLX(reader, dest)) > SERD_FAILURE) {
605 return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad escape\n");
606 } else if (st != SERD_SUCCESS && (st = read_PN_CHARS(reader, dest))) {
607 break;
608 }
609 trailing_unescaped_dot = (c == '.');
610 }
611
612 SerdNode* const n = deref(reader, dest);
613 if (trailing_unescaped_dot) {
614 // Ate trailing dot, pop it from stack/node and inform caller
615 --n->n_bytes;
616 serd_stack_pop(&reader->stack, 1);
617 *ate_dot = true;
618 }
619
620 return (st > SERD_FAILURE) ? st : SERD_SUCCESS;
621}
622
623// Read the remainder of a PN_PREFIX after some initial characters
624static SerdStatus
626{
627 int c = 0;
628 while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')*
629 if (c == '.') {
630 push_byte(reader, dest, eat_byte_safe(reader, c));
631 } else if (read_PN_CHARS(reader, dest)) {
632 break;
633 }
634 }
635
636 const SerdNode* const n = deref(reader, dest);
637 if (n->buf[n->n_bytes - 1] == '.' && read_PN_CHARS(reader, dest)) {
638 return r_err(reader, SERD_ERR_BAD_SYNTAX, "prefix ends with `.'\n");
639 }
640
641 return SERD_SUCCESS;
642}
643
644static SerdStatus
646{
647 if (!read_PN_CHARS_BASE(reader, dest)) {
648 return read_PN_PREFIX_tail(reader, dest);
649 }
650
651 return SERD_FAILURE;
652}
653
654static SerdStatus
656{
657 int c = peek_byte(reader);
658 if (!is_alpha(c)) {
659 return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected `%c'\n", c);
660 }
661
662 *dest = push_node(reader, SERD_LITERAL, "", 0);
663
665 TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c)));
666 while ((c = peek_byte(reader)) && is_alpha(c)) {
667 TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c)));
668 }
669
670 while (peek_byte(reader) == '-') {
671 TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, '-')));
672 while ((c = peek_byte(reader)) && (is_alpha(c) || is_digit(c))) {
673 TRY(st, push_byte(reader, *dest, eat_byte_safe(reader, c)));
674 }
675 }
676
677 return SERD_SUCCESS;
678}
679
680static SerdStatus
682{
683 int c = peek_byte(reader);
684 if (!is_alpha(c)) {
685 return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad IRI scheme start `%c'\n", c);
686 }
687
688 while ((c = peek_byte(reader)) != EOF) {
689 if (c == '>') {
690 return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing IRI scheme\n");
691 }
692
693 if (!is_uri_scheme_char(c)) {
694 return r_err(reader,
696 "bad IRI scheme char U+%04X (%c)\n",
697 (unsigned)c,
698 (char)c);
699 }
700
701 push_byte(reader, dest, eat_byte_safe(reader, c));
702 if (c == ':') {
703 return SERD_SUCCESS; // End of scheme
704 }
705 }
706
707 return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n");
708}
709
710static SerdStatus
712{
713 if (!eat_byte_check(reader, '<')) {
714 return SERD_ERR_BAD_SYNTAX;
715 }
716
717 *dest = push_node(reader, SERD_URI, "", 0);
718
719 if (!fancy_syntax(reader) && read_IRIREF_scheme(reader, *dest)) {
720 *dest = pop_node(reader, *dest);
721 return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected IRI scheme\n");
722 }
723
725 uint32_t code = 0;
726 while (!st) {
727 const int c = eat_byte_safe(reader, peek_byte(reader));
728 switch (c) {
729 case '"':
730 case '<':
731 *dest = pop_node(reader, *dest);
732 return r_err(
733 reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character `%c'\n", c);
734
735 case '>':
736 return SERD_SUCCESS;
737
738 case '\\':
739 if (read_UCHAR(reader, *dest, &code)) {
740 *dest = pop_node(reader, *dest);
741 return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid IRI escape\n");
742 }
743
744 switch (code) {
745 case 0:
746 case ' ':
747 case '<':
748 case '>':
749 *dest = pop_node(reader, *dest);
750 return r_err(reader,
752 "invalid escaped IRI character U+%04X\n",
753 code);
754 default:
755 break;
756 }
757 break;
758
759 case '^':
760 case '`':
761 case '{':
762 case '|':
763 case '}':
764 *dest = pop_node(reader, *dest);
765 return r_err(
766 reader, SERD_ERR_BAD_SYNTAX, "invalid IRI character `%c'\n", c);
767
768 default:
769 if (c <= 0x20) {
770 r_err(reader,
772 "invalid IRI character (escape %%%02X)\n",
773 (unsigned)c);
774 if (reader->strict) {
775 *dest = pop_node(reader, *dest);
776 return SERD_ERR_BAD_SYNTAX;
777 }
778 st = SERD_FAILURE;
779 push_byte(reader, *dest, c);
780 } else if (!(c & 0x80)) {
781 push_byte(reader, *dest, c);
782 } else if (read_utf8_character(reader, *dest, (uint8_t)c)) {
783 if (reader->strict) {
784 *dest = pop_node(reader, *dest);
785 return SERD_ERR_BAD_SYNTAX;
786 }
787 }
788 }
789 }
790
791 *dest = pop_node(reader, *dest);
792 return st;
793}
794
795static SerdStatus
796read_PrefixedName(SerdReader* reader, Ref dest, bool read_prefix, bool* ate_dot)
797{
799 if (read_prefix && ((st = read_PN_PREFIX(reader, dest)) > SERD_FAILURE)) {
800 return st;
801 }
802
803 if (peek_byte(reader) != ':') {
804 return SERD_FAILURE;
805 }
806
807 push_byte(reader, dest, eat_byte_safe(reader, ':'));
808
809 st = read_PN_LOCAL(reader, dest, ate_dot);
810
811 return (st > SERD_FAILURE) ? st : SERD_SUCCESS;
812}
813
814static SerdStatus
815read_0_9(SerdReader* reader, Ref str, bool at_least_one)
816{
817 unsigned count = 0;
819 for (int c = 0; is_digit((c = peek_byte(reader))); ++count) {
820 TRY(st, push_byte(reader, str, eat_byte_safe(reader, c)));
821 }
822
823 if (at_least_one && count == 0) {
824 return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected digit\n");
825 }
826
827 return SERD_SUCCESS;
828}
829
830static SerdStatus
831read_number(SerdReader* reader, Ref* dest, Ref* datatype, bool* ate_dot)
832{
833#define XSD_DECIMAL NS_XSD "decimal"
834#define XSD_DOUBLE NS_XSD "double"
835#define XSD_INTEGER NS_XSD "integer"
836
837 *dest = push_node(reader, SERD_LITERAL, "", 0);
838
840 int c = peek_byte(reader);
841 bool has_decimal = false;
842 if (c == '-' || c == '+') {
843 push_byte(reader, *dest, eat_byte_safe(reader, c));
844 }
845 if ((c = peek_byte(reader)) == '.') {
846 has_decimal = true;
847 // decimal case 2 (e.g. '.0' or `-.0' or `+.0')
848 push_byte(reader, *dest, eat_byte_safe(reader, c));
849 TRY(st, read_0_9(reader, *dest, true));
850 } else {
851 // all other cases ::= ( '-' | '+' ) [0-9]+ ( . )? ( [0-9]+ )? ...
852 TRY(st, read_0_9(reader, *dest, true));
853 if ((c = peek_byte(reader)) == '.') {
854 has_decimal = true;
855
856 // Annoyingly, dot can be end of statement, so tentatively eat
857 eat_byte_safe(reader, c);
858 c = peek_byte(reader);
859 if (!is_digit(c) && c != 'e' && c != 'E') {
860 *ate_dot = true; // Force caller to deal with stupid grammar
861 return SERD_SUCCESS; // Next byte is not a number character
862 }
863
864 push_byte(reader, *dest, '.');
865 read_0_9(reader, *dest, false);
866 }
867 }
868 c = peek_byte(reader);
869 if (c == 'e' || c == 'E') {
870 // double
871 push_byte(reader, *dest, eat_byte_safe(reader, c));
872 switch ((c = peek_byte(reader))) {
873 case '+':
874 case '-':
875 push_byte(reader, *dest, eat_byte_safe(reader, c));
876 default:
877 break;
878 }
879 TRY(st, read_0_9(reader, *dest, true));
880 *datatype = push_node(reader, SERD_URI, XSD_DOUBLE, sizeof(XSD_DOUBLE) - 1);
881 } else if (has_decimal) {
882 *datatype =
883 push_node(reader, SERD_URI, XSD_DECIMAL, sizeof(XSD_DECIMAL) - 1);
884 } else {
885 *datatype =
886 push_node(reader, SERD_URI, XSD_INTEGER, sizeof(XSD_INTEGER) - 1);
887 }
888
889 return SERD_SUCCESS;
890}
891
892static SerdStatus
893read_iri(SerdReader* reader, Ref* dest, bool* ate_dot)
894{
895 switch (peek_byte(reader)) {
896 case '<':
897 return read_IRIREF(reader, dest);
898 default:
899 *dest = push_node(reader, SERD_CURIE, "", 0);
900 return read_PrefixedName(reader, *dest, true, ate_dot);
901 }
902}
903
904static SerdStatus
906 Ref* dest,
907 Ref* datatype,
908 Ref* lang,
909 SerdNodeFlags* flags,
910 bool* ate_dot)
911{
912 *dest = push_node(reader, SERD_LITERAL, "", 0);
913
914 SerdStatus st = read_String(reader, *dest, flags);
915 if (st) {
916 *dest = pop_node(reader, *dest);
917 return st;
918 }
919
920 switch (peek_byte(reader)) {
921 case '@':
922 eat_byte_safe(reader, '@');
923 if ((st = read_LANGTAG(reader, lang))) {
924 *datatype = pop_node(reader, *datatype);
925 *lang = pop_node(reader, *lang);
926 *dest = pop_node(reader, *dest);
927 return r_err(reader, st, "bad literal\n");
928 }
929 break;
930 case '^':
931 eat_byte_safe(reader, '^');
932 eat_byte_check(reader, '^');
933 if ((st = read_iri(reader, datatype, ate_dot))) {
934 *datatype = pop_node(reader, *datatype);
935 *lang = pop_node(reader, *lang);
936 *dest = pop_node(reader, *dest);
937 return r_err(reader, st, "bad literal\n");
938 }
939 break;
940 }
941
942 return SERD_SUCCESS;
943}
944
945static SerdStatus
946read_verb(SerdReader* reader, Ref* dest)
947{
948 if (peek_byte(reader) == '<') {
949 return read_IRIREF(reader, dest);
950 }
951
952 /* Either a qname, or "a". Read the prefix first, and if it is in fact
953 "a", produce that instead.
954 */
955 *dest = push_node(reader, SERD_CURIE, "", 0);
956
957 SerdStatus st = read_PN_PREFIX(reader, *dest);
958 bool ate_dot = false;
959 SerdNode* node = deref(reader, *dest);
960 const int next = peek_byte(reader);
961 if (!st && node->n_bytes == 1 && node->buf[0] == 'a' && next != ':' &&
962 !is_PN_CHARS_BASE((uint32_t)next)) {
963 pop_node(reader, *dest);
964 *dest = push_node(reader, SERD_URI, NS_RDF "type", 47);
965 return SERD_SUCCESS;
966 }
967
968 if (st > SERD_FAILURE || read_PrefixedName(reader, *dest, false, &ate_dot) ||
969 ate_dot) {
970 *dest = pop_node(reader, *dest);
971 return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad verb\n");
972 }
973
974 return SERD_SUCCESS;
975}
976
977static SerdStatus
978read_BLANK_NODE_LABEL(SerdReader* reader, Ref* dest, bool* ate_dot)
979{
980 eat_byte_safe(reader, '_');
981 eat_byte_check(reader, ':');
982
983 const Ref ref = *dest =
984 push_node(reader,
986 reader->bprefix ? (char*)reader->bprefix : "",
987 reader->bprefix_len);
988
989 int c = peek_byte(reader); // First: (PN_CHARS | '_' | [0-9])
990 if (is_digit(c) || c == '_') {
991 push_byte(reader, ref, eat_byte_safe(reader, c));
992 } else if (read_PN_CHARS(reader, ref)) {
993 *dest = pop_node(reader, *dest);
994 return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid name start\n");
995 }
996
997 while ((c = peek_byte(reader))) { // Middle: (PN_CHARS | '.')*
998 if (c == '.') {
999 push_byte(reader, ref, eat_byte_safe(reader, c));
1000 } else if (read_PN_CHARS(reader, ref)) {
1001 break;
1002 }
1003 }
1004
1005 SerdNode* n = deref(reader, ref);
1006 if (n->buf[n->n_bytes - 1] == '.' && read_PN_CHARS(reader, ref)) {
1007 // Ate trailing dot, pop it from stack/node and inform caller
1008 --n->n_bytes;
1009 serd_stack_pop(&reader->stack, 1);
1010 *ate_dot = true;
1011 }
1012
1013 if (fancy_syntax(reader)) {
1014 if (is_digit(n->buf[reader->bprefix_len + 1])) {
1015 if ((n->buf[reader->bprefix_len]) == 'b') {
1016 ((char*)n->buf)[reader->bprefix_len] = 'B'; // Prevent clash
1017 reader->seen_genid = true;
1018 } else if (reader->seen_genid && n->buf[reader->bprefix_len] == 'B') {
1019 *dest = pop_node(reader, *dest);
1020 return r_err(reader,
1022 "found both `b' and `B' blank IDs, prefix required\n");
1023 }
1024 }
1025 }
1026
1027 return SERD_SUCCESS;
1028}
1029
1030static Ref
1032{
1033 eat_byte_safe(reader, '=');
1034 if (eat_byte_check(reader, '=') != '=') {
1035 r_err(reader, SERD_ERR_BAD_SYNTAX, "expected `='\n");
1036 return 0;
1037 }
1038
1039 Ref subject = 0;
1040 bool ate_dot = false;
1041 read_ws_star(reader);
1042 read_iri(reader, &subject, &ate_dot);
1043 return subject;
1044}
1045
1046static SerdStatus
1047read_anon(SerdReader* reader, ReadContext ctx, bool subject, Ref* dest)
1048{
1049 const SerdStatementFlags old_flags = *ctx.flags;
1050 bool empty = false;
1051 eat_byte_safe(reader, '[');
1052 if ((empty = peek_delim(reader, ']'))) {
1053 *ctx.flags |= (subject) ? SERD_EMPTY_S : SERD_EMPTY_O;
1054 } else {
1055 *ctx.flags |= (subject) ? SERD_ANON_S_BEGIN : SERD_ANON_O_BEGIN;
1056 if (peek_delim(reader, '=')) {
1057 if (!(*dest = read_blankName(reader)) || !eat_delim(reader, ';')) {
1058 return SERD_ERR_BAD_SYNTAX;
1059 }
1060 }
1061 }
1062
1063 if (!*dest) {
1064 *dest = blank_id(reader);
1065 }
1066
1068 if (ctx.subject) {
1069 TRY(st, emit_statement(reader, ctx, *dest, 0, 0));
1070 }
1071
1072 ctx.subject = *dest;
1073 if (!empty) {
1074 *ctx.flags &= ~(unsigned)SERD_LIST_CONT;
1075 if (!subject) {
1076 *ctx.flags |= SERD_ANON_CONT;
1077 }
1078
1079 bool ate_dot_in_list = false;
1080 read_predicateObjectList(reader, ctx, &ate_dot_in_list);
1081 if (ate_dot_in_list) {
1082 return r_err(reader, SERD_ERR_BAD_SYNTAX, "`.' inside blank\n");
1083 }
1084
1085 read_ws_star(reader);
1086 if (reader->end_sink) {
1087 reader->end_sink(reader->handle, deref(reader, *dest));
1088 }
1089
1090 *ctx.flags = old_flags;
1091 }
1092
1093 return (eat_byte_check(reader, ']') == ']') ? SERD_SUCCESS
1095}
1096
1097/* If emit is true: recurses, calling statement_sink for every statement
1098 encountered, and leaves stack in original calling state (i.e. pops
1099 everything it pushes). */
1100static SerdStatus
1101read_object(SerdReader* reader, ReadContext* ctx, bool emit, bool* ate_dot)
1102{
1103 static const char* const XSD_BOOLEAN = NS_XSD "boolean";
1104 static const size_t XSD_BOOLEAN_LEN = 40;
1105
1106#ifndef NDEBUG
1107 const size_t orig_stack_size = reader->stack.size;
1108#endif
1109
1111
1112 bool simple = (ctx->subject != 0);
1113 SerdNode* node = NULL;
1114 Ref o = 0;
1115 Ref datatype = 0;
1116 Ref lang = 0;
1117 uint32_t flags = 0;
1118 const int c = peek_byte(reader);
1119 if (!fancy_syntax(reader)) {
1120 switch (c) {
1121 case '"':
1122 case ':':
1123 case '<':
1124 case '_':
1125 break;
1126 default:
1127 return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected: ':', '<', or '_'\n");
1128 }
1129 }
1130 switch (c) {
1131 case EOF:
1132 case ')':
1133 return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected object\n");
1134 case '[':
1135 simple = false;
1136 ret = read_anon(reader, *ctx, false, &o);
1137 break;
1138 case '(':
1139 simple = false;
1140 ret = read_collection(reader, *ctx, &o);
1141 break;
1142 case '_':
1143 ret = read_BLANK_NODE_LABEL(reader, &o, ate_dot);
1144 break;
1145 case '<':
1146 case ':':
1147 ret = read_iri(reader, &o, ate_dot);
1148 break;
1149 case '+':
1150 case '-':
1151 case '.':
1152 case '0':
1153 case '1':
1154 case '2':
1155 case '3':
1156 case '4':
1157 case '5':
1158 case '6':
1159 case '7':
1160 case '8':
1161 case '9':
1162 ret = read_number(reader, &o, &datatype, ate_dot);
1163 break;
1164 case '\"':
1165 case '\'':
1166 ret = read_literal(reader, &o, &datatype, &lang, &flags, ate_dot);
1167 break;
1168 default:
1169 /* Either a boolean literal, or a qname. Read the prefix first, and if
1170 it is in fact a "true" or "false" literal, produce that instead.
1171 */
1172 o = push_node(reader, SERD_CURIE, "", 0);
1173 while (!read_PN_CHARS_BASE(reader, o)) {
1174 }
1175 node = deref(reader, o);
1176 if ((node->n_bytes == 4 && !memcmp(node->buf, "true", 4)) ||
1177 (node->n_bytes == 5 && !memcmp(node->buf, "false", 5))) {
1178 node->type = SERD_LITERAL;
1179 datatype = push_node(reader, SERD_URI, XSD_BOOLEAN, XSD_BOOLEAN_LEN);
1180 ret = SERD_SUCCESS;
1181 } else if (read_PN_PREFIX_tail(reader, o) > SERD_FAILURE) {
1182 ret = SERD_ERR_BAD_SYNTAX;
1183 } else {
1184 if ((ret = read_PrefixedName(reader, o, false, ate_dot))) {
1185 ret = ret > SERD_FAILURE ? ret : SERD_ERR_BAD_SYNTAX;
1186 pop_node(reader, o);
1187 return r_err(reader, ret, "expected prefixed name\n");
1188 }
1189 }
1190 }
1191
1192 if (!ret && simple && o) {
1193 deref(reader, o)->flags = flags;
1194 }
1195
1196 if (!ret && emit && simple) {
1197 ret = emit_statement(reader, *ctx, o, datatype, lang);
1198 } else if (!ret && !emit) {
1199 ctx->object = o;
1200 ctx->datatype = datatype;
1201 ctx->lang = lang;
1202 return SERD_SUCCESS;
1203 }
1204
1205 pop_node(reader, lang);
1206 pop_node(reader, datatype);
1207 pop_node(reader, o);
1208#ifndef NDEBUG
1209 assert(reader->stack.size == orig_stack_size);
1210#endif
1211 return ret;
1212}
1213
1214static SerdStatus
1215read_objectList(SerdReader* reader, ReadContext ctx, bool* ate_dot)
1216{
1218 TRY(st, read_object(reader, &ctx, true, ate_dot));
1219 if (!fancy_syntax(reader) && peek_delim(reader, ',')) {
1220 return r_err(
1221 reader, SERD_ERR_BAD_SYNTAX, "syntax does not support abbreviation\n");
1222 }
1223
1224 while (!*ate_dot && eat_delim(reader, ',')) {
1225 st = read_object(reader, &ctx, true, ate_dot);
1226 }
1227
1228 return st;
1229}
1230
1231static SerdStatus
1233{
1235 while (!(st = read_verb(reader, &ctx.predicate)) && read_ws_star(reader) &&
1236 !(st = read_objectList(reader, ctx, ate_dot))) {
1237 ctx.predicate = pop_node(reader, ctx.predicate);
1238 if (*ate_dot) {
1239 return SERD_SUCCESS;
1240 }
1241
1242 bool ate_semi = false;
1243 int c = 0;
1244 do {
1245 read_ws_star(reader);
1246 switch (c = peek_byte(reader)) {
1247 case EOF:
1248 return r_err(reader, SERD_ERR_BAD_SYNTAX, "unexpected end of file\n");
1249 case '.':
1250 case ']':
1251 case '}':
1252 return SERD_SUCCESS;
1253 case ';':
1254 eat_byte_safe(reader, c);
1255 ate_semi = true;
1256 }
1257 } while (c == ';');
1258
1259 if (!ate_semi) {
1260 return r_err(reader, SERD_ERR_BAD_SYNTAX, "missing ';' or '.'\n");
1261 }
1262 }
1263
1264 ctx.predicate = pop_node(reader, ctx.predicate);
1265 return st;
1266}
1267
1268static SerdStatus
1270 ReadContext ctx,
1271 Ref n1,
1272 Ref n2,
1273 SerdStatus st)
1274{
1275 pop_node(reader, n2);
1276 pop_node(reader, n1);
1277 *ctx.flags &= ~(unsigned)SERD_LIST_CONT;
1278 if (!st) {
1279 return (eat_byte_check(reader, ')') == ')') ? SERD_SUCCESS
1281 }
1282
1283 return st;
1284}
1285
1286static SerdStatus
1288{
1290 eat_byte_safe(reader, '(');
1291
1292 bool end = peek_delim(reader, ')');
1293
1294 *dest = end ? reader->rdf_nil : blank_id(reader);
1295 if (ctx.subject) {
1296 // subject predicate _:head
1297 *ctx.flags |= (end ? 0 : SERD_LIST_O_BEGIN);
1298 TRY(st, emit_statement(reader, ctx, *dest, 0, 0));
1299 *ctx.flags |= SERD_LIST_CONT;
1300 } else {
1301 *ctx.flags |= (end ? 0 : SERD_LIST_S_BEGIN);
1302 }
1303
1304 if (end) {
1305 return end_collection(reader, ctx, 0, 0, st);
1306 }
1307
1308 /* The order of node allocation here is necessarily not in stack order,
1309 so we create two nodes and recycle them throughout. */
1310 Ref n1 = push_node_padded(reader, genid_size(reader), SERD_BLANK, "", 0);
1311 Ref n2 = 0;
1312 Ref node = n1;
1313 Ref rest = 0;
1314
1315 ctx.subject = *dest;
1316 while (!peek_delim(reader, ')')) {
1317 // _:node rdf:first object
1318 ctx.predicate = reader->rdf_first;
1319 bool ate_dot = false;
1320 if ((st = read_object(reader, &ctx, true, &ate_dot)) || ate_dot) {
1321 return end_collection(reader, ctx, n1, n2, st);
1322 }
1323
1324 if (!(end = peek_delim(reader, ')'))) {
1325 /* Give rest a new ID. Done as late as possible to ensure it is
1326 used and > IDs generated by read_object above. */
1327 if (!rest) {
1328 rest = n2 = blank_id(reader); // First pass, push
1329 } else {
1330 set_blank_id(reader, rest, genid_size(reader));
1331 }
1332 }
1333
1334 // _:node rdf:rest _:rest
1335 *ctx.flags |= SERD_LIST_CONT;
1336 ctx.predicate = reader->rdf_rest;
1337 TRY(st, emit_statement(reader, ctx, (end ? reader->rdf_nil : rest), 0, 0));
1338
1339 ctx.subject = rest; // _:node = _:rest
1340 rest = node; // _:rest = (old)_:node
1341 node = ctx.subject; // invariant
1342 }
1343
1344 return end_collection(reader, ctx, n1, n2, st);
1345}
1346
1347static SerdStatus
1348read_subject(SerdReader* reader, ReadContext ctx, Ref* dest, int* s_type)
1349{
1351 bool ate_dot = false;
1352 switch ((*s_type = peek_byte(reader))) {
1353 case '[':
1354 read_anon(reader, ctx, true, dest);
1355 break;
1356 case '(':
1357 st = read_collection(reader, ctx, dest);
1358 break;
1359 case '_':
1360 st = read_BLANK_NODE_LABEL(reader, dest, &ate_dot);
1361 break;
1362 default:
1363 st = read_iri(reader, dest, &ate_dot);
1364 }
1365
1366 if (ate_dot) {
1367 pop_node(reader, *dest);
1368 return r_err(reader, SERD_ERR_BAD_SYNTAX, "subject ends with `.'\n");
1369 }
1370
1371 return st;
1372}
1373
1374static SerdStatus
1376{
1377 bool ate_dot = false;
1378 switch (peek_byte(reader)) {
1379 case '[':
1380 eat_byte_safe(reader, '[');
1381 read_ws_star(reader);
1382 if (!eat_byte_check(reader, ']')) {
1383 return SERD_ERR_BAD_SYNTAX;
1384 }
1385 *dest = blank_id(reader);
1386 return SERD_SUCCESS;
1387 case '_':
1388 return read_BLANK_NODE_LABEL(reader, dest, &ate_dot);
1389 default:
1390 if (!read_iri(reader, dest, &ate_dot)) {
1391 return SERD_SUCCESS;
1392 } else {
1393 return r_err(reader, SERD_ERR_BAD_SYNTAX, "expected label or subject\n");
1394 }
1395 }
1396}
1397
1398static SerdStatus
1399read_triples(SerdReader* reader, ReadContext ctx, bool* ate_dot)
1400{
1402 if (ctx.subject) {
1403 read_ws_star(reader);
1404 switch (peek_byte(reader)) {
1405 case '.':
1406 *ate_dot = eat_byte_safe(reader, '.');
1407 return SERD_FAILURE;
1408 case '}':
1409 return SERD_FAILURE;
1410 }
1411 st = read_predicateObjectList(reader, ctx, ate_dot);
1412 }
1413
1414 ctx.subject = ctx.predicate = 0;
1415 return st > SERD_FAILURE ? st : SERD_SUCCESS;
1416}
1417
1418static SerdStatus
1419read_base(SerdReader* reader, bool sparql, bool token)
1420{
1422 if (token) {
1423 TRY(st, eat_string(reader, "base", 4));
1424 }
1425
1426 read_ws_star(reader);
1427
1428 Ref uri = 0;
1429 TRY(st, read_IRIREF(reader, &uri));
1430 if (reader->base_sink) {
1431 TRY(st, reader->base_sink(reader->handle, deref(reader, uri)));
1432 }
1433 pop_node(reader, uri);
1434
1435 read_ws_star(reader);
1436 if (!sparql) {
1437 return eat_byte_check(reader, '.') ? SERD_SUCCESS : SERD_ERR_BAD_SYNTAX;
1438 }
1439
1440 if (peek_byte(reader) == '.') {
1441 return r_err(reader, SERD_ERR_BAD_SYNTAX, "full stop after SPARQL BASE\n");
1442 }
1443
1444 return SERD_SUCCESS;
1445}
1446
1447static SerdStatus
1448read_prefixID(SerdReader* reader, bool sparql, bool token)
1449{
1451 if (token) {
1452 TRY(st, eat_string(reader, "prefix", 6));
1453 }
1454
1455 read_ws_star(reader);
1456 Ref name = push_node(reader, SERD_LITERAL, "", 0);
1457 if ((st = read_PN_PREFIX(reader, name)) > SERD_FAILURE) {
1458 return st;
1459 }
1460
1461 if (eat_byte_check(reader, ':') != ':') {
1462 pop_node(reader, name);
1463 return SERD_ERR_BAD_SYNTAX;
1464 }
1465
1466 read_ws_star(reader);
1467 Ref uri = 0;
1468 TRY(st, read_IRIREF(reader, &uri));
1469
1470 if (reader->prefix_sink) {
1471 st = reader->prefix_sink(
1472 reader->handle, deref(reader, name), deref(reader, uri));
1473 }
1474
1475 pop_node(reader, uri);
1476 pop_node(reader, name);
1477 if (!sparql) {
1478 read_ws_star(reader);
1479 st = eat_byte_check(reader, '.') ? SERD_SUCCESS : SERD_ERR_BAD_SYNTAX;
1480 }
1481
1482 return st;
1483}
1484
1485static SerdStatus
1487{
1488 const bool sparql = peek_byte(reader) != '@';
1489 if (!sparql) {
1490 eat_byte_safe(reader, '@');
1491 switch (peek_byte(reader)) {
1492 case 'B':
1493 case 'P':
1494 return r_err(reader, SERD_ERR_BAD_SYNTAX, "uppercase directive\n");
1495 }
1496 }
1497
1498 switch (peek_byte(reader)) {
1499 case 'B':
1500 case 'b':
1501 return read_base(reader, sparql, true);
1502 case 'P':
1503 case 'p':
1504 return read_prefixID(reader, sparql, true);
1505 default:
1506 break;
1507 }
1508
1509 return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid directive\n");
1510}
1511
1512static SerdStatus
1514{
1515 if (!eat_byte_check(reader, '{')) {
1516 return SERD_ERR_BAD_SYNTAX;
1517 }
1518
1519 read_ws_star(reader);
1520 while (peek_byte(reader) != '}') {
1521 bool ate_dot = false;
1522 int s_type = 0;
1523 ctx->subject = 0;
1524 SerdStatus st = read_subject(reader, *ctx, &ctx->subject, &s_type);
1525 if (st) {
1526 return r_err(reader, SERD_ERR_BAD_SYNTAX, "bad subject\n");
1527 }
1528
1529 if (read_triples(reader, *ctx, &ate_dot) && s_type != '[') {
1530 return r_err(
1531 reader, SERD_ERR_BAD_SYNTAX, "missing predicate object list\n");
1532 }
1533
1534 pop_node(reader, ctx->subject);
1535 read_ws_star(reader);
1536 if (peek_byte(reader) == '.') {
1537 eat_byte_safe(reader, '.');
1538 }
1539 read_ws_star(reader);
1540 }
1541
1542 eat_byte_safe(reader, '}');
1543 read_ws_star(reader);
1544 if (peek_byte(reader) == '.') {
1545 return r_err(reader, SERD_ERR_BAD_SYNTAX, "graph followed by `.'\n");
1546 }
1547
1548 return SERD_SUCCESS;
1549}
1550
1551static int
1552tokcmp(SerdReader* reader, Ref ref, const char* tok, size_t n)
1553{
1554 SerdNode* node = deref(reader, ref);
1555 if (!node || node->n_bytes != n) {
1556 return -1;
1557 }
1558
1559 return serd_strncasecmp((const char*)node->buf, tok, n);
1560}
1561
1564{
1565 SerdStatementFlags flags = 0;
1566 ReadContext ctx = {0, 0, 0, 0, 0, 0, &flags};
1567 bool ate_dot = false;
1568 int s_type = 0;
1570 read_ws_star(reader);
1571 switch (peek_byte(reader)) {
1572 case '\0':
1573 eat_byte_safe(reader, '\0');
1574 return SERD_FAILURE;
1575 case EOF:
1576 return SERD_FAILURE;
1577 case '@':
1578 if (!fancy_syntax(reader)) {
1579 return r_err(
1580 reader, SERD_ERR_BAD_SYNTAX, "syntax does not support directives\n");
1581 }
1582 TRY(st, read_directive(reader));
1583 read_ws_star(reader);
1584 break;
1585 case '{':
1586 if (reader->syntax == SERD_TRIG) {
1587 TRY(st, read_wrappedGraph(reader, &ctx));
1588 read_ws_star(reader);
1589 } else {
1590 return r_err(
1591 reader, SERD_ERR_BAD_SYNTAX, "syntax does not support graphs\n");
1592 }
1593 break;
1594 default:
1595 if ((st = read_subject(reader, ctx, &ctx.subject, &s_type)) >
1596 SERD_FAILURE) {
1597 return st;
1598 }
1599
1600 if (!tokcmp(reader, ctx.subject, "base", 4)) {
1601 st = read_base(reader, true, false);
1602 } else if (!tokcmp(reader, ctx.subject, "prefix", 6)) {
1603 st = read_prefixID(reader, true, false);
1604 } else if (!tokcmp(reader, ctx.subject, "graph", 5)) {
1605 read_ws_star(reader);
1606 TRY(st, read_labelOrSubject(reader, &ctx.graph));
1607 read_ws_star(reader);
1608 TRY(st, read_wrappedGraph(reader, &ctx));
1609 pop_node(reader, ctx.graph);
1610 ctx.graph = 0;
1611 read_ws_star(reader);
1612 } else if (read_ws_star(reader) && peek_byte(reader) == '{') {
1613 if (s_type == '(' || (s_type == '[' && !*ctx.flags)) {
1614 return r_err(reader, SERD_ERR_BAD_SYNTAX, "invalid graph name\n");
1615 }
1616 ctx.graph = ctx.subject;
1617 ctx.subject = 0;
1618 TRY(st, read_wrappedGraph(reader, &ctx));
1619 pop_node(reader, ctx.graph);
1620 read_ws_star(reader);
1621 } else if ((st = read_triples(reader, ctx, &ate_dot))) {
1622 if (st == SERD_FAILURE && s_type == '[') {
1623 return SERD_SUCCESS;
1624 }
1625
1626 if (ate_dot) {
1627 return r_err(
1628 reader, SERD_ERR_BAD_SYNTAX, "unexpected end of statement\n");
1629 }
1630
1631 return st > SERD_FAILURE ? st : SERD_ERR_BAD_SYNTAX;
1632 } else if (!ate_dot) {
1633 read_ws_star(reader);
1634 st = (eat_byte_check(reader, '.') == '.') ? SERD_SUCCESS
1636 }
1637 break;
1638 }
1639 return st;
1640}
1641
1642static void
1644{
1645 for (int c = 0; (c = peek_byte(reader)) && c != byte;) {
1646 eat_byte_safe(reader, c);
1647 }
1648}
1649
1652{
1653 while (!reader->source.eof) {
1654 const SerdStatus st = read_n3_statement(reader);
1655 if (st > SERD_FAILURE) {
1656 if (reader->strict) {
1657 return st;
1658 }
1659 skip_until(reader, '\n');
1660 }
1661 }
1662
1663 return SERD_SUCCESS;
1664}
1665
1668{
1670 while (!reader->source.eof) {
1671 SerdStatementFlags flags = 0;
1672 ReadContext ctx = {0, 0, 0, 0, 0, 0, &flags};
1673 bool ate_dot = false;
1674 int s_type = 0;
1675 read_ws_star(reader);
1676 if (peek_byte(reader) == EOF) {
1677 break;
1678 }
1679
1680 if (peek_byte(reader) == '@') {
1681 return r_err(
1682 reader, SERD_ERR_BAD_SYNTAX, "syntax does not support directives\n");
1683 }
1684
1685 // subject predicate object
1686 if ((st = read_subject(reader, ctx, &ctx.subject, &s_type)) ||
1687 !read_ws_star(reader) || (st = read_IRIREF(reader, &ctx.predicate)) ||
1688 !read_ws_star(reader) ||
1689 (st = read_object(reader, &ctx, false, &ate_dot))) {
1690 return st;
1691 }
1692
1693 if (!ate_dot) { // graphLabel?
1694 read_ws_star(reader);
1695 switch (peek_byte(reader)) {
1696 case '.':
1697 break;
1698 case '_':
1699 TRY(st, read_BLANK_NODE_LABEL(reader, &ctx.graph, &ate_dot));
1700 break;
1701 default:
1702 TRY(st, read_IRIREF(reader, &ctx.graph));
1703 }
1704
1705 // Terminating '.'
1706 read_ws_star(reader);
1707 if (!eat_byte_check(reader, '.')) {
1708 return SERD_ERR_BAD_SYNTAX;
1709 }
1710 }
1711
1712 TRY(st, emit_statement(reader, ctx, ctx.object, ctx.datatype, ctx.lang));
1713
1714 pop_node(reader, ctx.graph);
1715 pop_node(reader, ctx.lang);
1716 pop_node(reader, ctx.datatype);
1717 pop_node(reader, ctx.object);
1718 }
1719 return SERD_SUCCESS;
1720}
#define NULL
Definition CarlaBridgeFormat.cpp:30
assert(0)
#define byte
Definition blargg_source.h:87
register unsigned i
Definition inflate.c:1575
static const char * name
Definition pugl.h:1582
SerdStatus
Return status code.
Definition serd.h:100
struct SerdReaderImpl SerdReader
Streaming parser that reads a text stream and writes to a statement sink.
Definition serd.h:94
uint32_t SerdStatementFlags
Bitwise OR of SerdStatementFlag values.
Definition serd.h:133
uint32_t SerdNodeFlags
Bitwise OR of SerdNodeFlag values.
Definition serd.h:196
@ SERD_ERR_ID_CLASH
Encountered clashing blank node IDs.
Definition serd.h:107
@ SERD_ERR_BAD_SYNTAX
Invalid syntax.
Definition serd.h:104
@ SERD_FAILURE
Non-fatal failure.
Definition serd.h:102
@ SERD_SUCCESS
No error.
Definition serd.h:101
@ SERD_TURTLE
Terse triples http://www.w3.org/TR/turtle.
Definition serd.h:114
@ SERD_TRIG
Terse quads http://www.w3.org/TR/trig/.
Definition serd.h:117
@ SERD_URI
Definition serd.h:169
@ SERD_CURIE
Definition serd.h:177
@ SERD_LITERAL
Definition serd.h:159
@ SERD_BLANK
Definition serd.h:186
@ SERD_EMPTY_S
Empty blank node subject.
Definition serd.h:122
@ SERD_LIST_CONT
Continuation of list.
Definition serd.h:129
@ SERD_EMPTY_O
Empty blank node object.
Definition serd.h:123
@ SERD_ANON_S_BEGIN
Start of anonymous subject.
Definition serd.h:124
@ SERD_LIST_O_BEGIN
Start of list object.
Definition serd.h:128
@ SERD_ANON_O_BEGIN
Start of anonymous object.
Definition serd.h:125
@ SERD_ANON_CONT
Continuation of anonymous node.
Definition serd.h:126
@ SERD_LIST_S_BEGIN
Start of list subject.
Definition serd.h:127
@ SERD_HAS_QUOTE
Contains quotes ('"').
Definition serd.h:192
@ SERD_HAS_NEWLINE
Contains line breaks ('\n' or '\r').
Definition serd.h:191
#define read_object
#define read_literal
SerdStatus r_err(SerdReader *reader, SerdStatus st, const char *fmt,...)
Definition reader.c:32
SerdNode * deref(SerdReader *reader, const Ref ref)
Definition reader.c:116
SerdStatus emit_statement(SerdReader *reader, ReadContext ctx, Ref o, Ref d, Ref l)
Definition reader.c:143
void set_blank_id(SerdReader *reader, Ref ref, size_t buf_size)
Definition reader.c:44
Ref push_node(SerdReader *reader, SerdType type, const char *str, size_t n_bytes)
Definition reader.c:110
Ref pop_node(SerdReader *reader, Ref ref)
Definition reader.c:127
Ref blank_id(SerdReader *reader)
Definition reader.c:59
Ref push_node_padded(SerdReader *reader, size_t maxlen, SerdType type, const char *str, size_t n_bytes)
Definition reader.c:83
size_t genid_size(SerdReader *reader)
Definition reader.c:53
#define NS_RDF
Definition serd_internal.h:25
#define NS_XSD
Definition serd_internal.h:24
static SordNode * uri(SordWorld *world, int num)
Definition sord_test.c:47
unsigned int uint32_t
Definition mid.cpp:100
unsigned char uint8_t
Definition mid.cpp:98
static bool read_ws_star(SerdReader *reader)
Definition n3.c:304
static uint8_t read_HEX(SerdReader *reader)
Definition n3.c:53
static SerdStatus read_number(SerdReader *reader, Ref *dest, Ref *datatype, bool *ate_dot)
Definition n3.c:831
static bool is_PN_CHARS_BASE(const uint32_t c)
Definition n3.c:448
static SerdStatus read_IRIREF_scheme(SerdReader *reader, Ref dest)
Definition n3.c:681
static SerdStatus read_base(SerdReader *reader, bool sparql, bool token)
Definition n3.c:1419
static SerdStatus read_verb(SerdReader *reader, Ref *dest)
Definition n3.c:946
static SerdStatus read_triples(SerdReader *reader, ReadContext ctx, bool *ate_dot)
Definition n3.c:1399
static SerdStatus read_PN_LOCAL_ESC(SerdReader *reader, Ref dest)
Definition n3.c:523
static SerdStatus read_PN_PREFIX_tail(SerdReader *reader, Ref dest)
Definition n3.c:625
static void skip_until(SerdReader *reader, uint8_t byte)
Definition n3.c:1643
static SerdStatus read_predicateObjectList(SerdReader *reader, ReadContext ctx, bool *ate_dot)
Definition n3.c:1232
static bool eat_delim(SerdReader *reader, const char delim)
Definition n3.c:320
SerdStatus read_turtleTrigDoc(SerdReader *reader)
Definition n3.c:1651
static SerdStatus read_BLANK_NODE_LABEL(SerdReader *reader, Ref *dest, bool *ate_dot)
Definition n3.c:978
static bool peek_delim(SerdReader *reader, const char delim)
Definition n3.c:313
static SerdStatus read_prefixID(SerdReader *reader, bool sparql, bool token)
Definition n3.c:1448
static SerdStatus read_objectList(SerdReader *reader, ReadContext ctx, bool *ate_dot)
Definition n3.c:1215
static SerdStatus read_0_9(SerdReader *reader, Ref str, bool at_least_one)
Definition n3.c:815
static bool fancy_syntax(const SerdReader *reader)
Definition n3.c:41
static SerdStatus read_utf8_code(SerdReader *reader, Ref dest, uint32_t *code, uint8_t c)
Definition n3.c:233
static int tokcmp(SerdReader *reader, Ref ref, const char *tok, size_t n)
Definition n3.c:1552
static SerdStatus read_collection(SerdReader *reader, ReadContext ctx, Ref *dest)
Definition n3.c:1287
static SerdStatus read_directive(SerdReader *reader)
Definition n3.c:1486
static SerdStatus read_iri(SerdReader *reader, Ref *dest, bool *ate_dot)
Definition n3.c:893
SerdStatus read_nquadsDoc(SerdReader *reader)
Definition n3.c:1667
static SerdStatus read_anon(SerdReader *reader, ReadContext ctx, bool subject, Ref *dest)
Definition n3.c:1047
static SerdStatus read_subject(SerdReader *reader, ReadContext ctx, Ref *dest, int *s_type)
Definition n3.c:1348
static SerdStatus read_PN_CHARS(SerdReader *reader, Ref dest)
Definition n3.c:489
static SerdStatus read_STRING_LITERAL_LONG(SerdReader *reader, Ref ref, SerdNodeFlags *flags, uint8_t q)
Definition n3.c:333
static SerdStatus bad_char(SerdReader *reader, const char *fmt, uint8_t c)
Definition n3.c:183
#define XSD_INTEGER
static SerdStatus read_PERCENT(SerdReader *reader, Ref dest)
Definition n3.c:509
static SerdStatus end_collection(SerdReader *reader, ReadContext ctx, Ref n1, Ref n2, SerdStatus st)
Definition n3.c:1269
static SerdStatus read_utf8_bytes(SerdReader *reader, uint8_t bytes[4], uint32_t *size, uint8_t c)
Definition n3.c:196
static Ref read_blankName(SerdReader *reader)
Definition n3.c:1031
#define XSD_DECIMAL
static SerdStatus read_wrappedGraph(SerdReader *reader, ReadContext *ctx)
Definition n3.c:1513
static bool is_PN_CHARS(const uint32_t c)
Definition n3.c:482
static SerdStatus read_PN_PREFIX(SerdReader *reader, Ref dest)
Definition n3.c:645
static SerdStatus read_PrefixedName(SerdReader *reader, Ref dest, bool read_prefix, bool *ate_dot)
Definition n3.c:796
static SerdStatus read_PN_CHARS_BASE(SerdReader *reader, Ref dest)
Definition n3.c:459
static SerdStatus read_utf8_character(SerdReader *reader, Ref dest, uint8_t c)
Definition n3.c:218
static SerdStatus read_STRING_LITERAL(SerdReader *reader, Ref ref, SerdNodeFlags *flags, uint8_t q)
Definition n3.c:374
static SerdStatus read_character(SerdReader *reader, Ref dest, SerdNodeFlags *flags, uint8_t c)
Definition n3.c:251
static SerdStatus read_IRIREF(SerdReader *reader, Ref *dest)
Definition n3.c:711
static SerdStatus read_LANGTAG(SerdReader *reader, Ref *dest)
Definition n3.c:655
static void read_comment(SerdReader *reader)
Definition n3.c:274
SerdStatus read_n3_statement(SerdReader *reader)
Definition n3.c:1563
static SerdStatus read_PLX(SerdReader *reader, Ref dest)
Definition n3.c:559
static SerdStatus read_String(SerdReader *reader, Ref node, SerdNodeFlags *flags)
Definition n3.c:414
static bool read_ws(SerdReader *reader)
Definition n3.c:285
static SerdStatus read_PN_LOCAL(SerdReader *reader, Ref dest, bool *ate_dot)
Definition n3.c:573
#define XSD_DOUBLE
static SerdStatus read_UCHAR(SerdReader *reader, Ref dest, uint32_t *char_code)
Definition n3.c:66
static SerdStatus read_labelOrSubject(SerdReader *reader, Ref *dest)
Definition n3.c:1375
#define TRY(st, exp)
Definition n3.c:33
static SerdStatus read_ECHAR(SerdReader *reader, Ref dest, SerdNodeFlags *flags)
Definition n3.c:146
png_uint_32 length
Definition png.c:2247
static int peek_byte(SerdReader *reader)
Definition reader.h:128
static int eat_byte_safe(SerdReader *reader, const int byte)
Definition reader.h:136
static int eat_byte_check(SerdReader *reader, const int byte)
Definition reader.h:148
static SerdStatus push_byte(SerdReader *reader, Ref ref, const int c)
Definition reader.h:170
static SerdStatus eat_string(SerdReader *reader, const char *str, unsigned n)
Definition reader.h:159
static void push_bytes(SerdReader *reader, Ref ref, const uint8_t *bytes, unsigned len)
Definition reader.h:189
size_t Ref
Definition reader.h:46
static void serd_stack_pop(SerdStack *stack, size_t n_bytes)
Definition stack.h:80
static const uint8_t replacement_char[]
Definition string_utils.h:27
static int serd_strncasecmp(const char *s1, const char *s2, size_t n)
Definition string_utils.h:112
static uint32_t utf8_num_bytes(const uint8_t c)
Definition string_utils.h:124
static bool is_xdigit(const int c)
Definition string_utils.h:59
static uint32_t parse_counted_utf8_char(const uint8_t *utf8, size_t size)
Return the code point of a UTF-8 character with known length.
Definition string_utils.h:147
static bool is_digit(const int c)
Definition string_utils.h:45
static bool is_alpha(const int c)
Definition string_utils.h:38
Definition reader.h:48
Ref object
Definition reader.h:52
SerdStatementFlags * flags
Definition reader.h:55
Ref graph
Definition reader.h:49
Ref datatype
Definition reader.h:53
Ref lang
Definition reader.h:54
Ref subject
Definition reader.h:50
Ref predicate
Definition reader.h:51
bool eof
True iff end of file reached.
Definition byte_source.h:47
A syntactic RDF node.
Definition serd.h:199
const uint8_t *SERD_NULLABLE buf
Value string.
Definition serd.h:200
size_t n_bytes
Size in bytes (excluding null).
Definition serd.h:201
SerdNodeFlags flags
Node flags (string properties).
Definition serd.h:203
SerdType type
Node type.
Definition serd.h:204
Ref rdf_first
Definition reader.h:67
SerdPrefixSink prefix_sink
Definition reader.h:62
Ref rdf_rest
Definition reader.h:68
uint8_t * bprefix
Definition reader.h:76
size_t bprefix_len
Definition reader.h:77
SerdByteSource source
Definition reader.h:71
bool seen_genid
Definition reader.h:79
SerdStack stack
Definition reader.h:72
Ref rdf_nil
Definition reader.h:69
bool strict
True iff strict parsing.
Definition reader.h:78
SerdEndSink end_sink
Definition reader.h:64
SerdBaseSink base_sink
Definition reader.h:61
void * handle
Definition reader.h:59
SerdSyntax syntax
Definition reader.h:73
size_t size
Conceptual size of stack in buf.
Definition stack.h:33
Definition inftrees.h:27
int n
Definition crypt.c:458
return c
Definition crypt.c:175
b
Definition crypt.c:628
ulg size
Definition extract.c:2350
fmt[0]
Definition fileio.c:2503
register uch * q
Definition fileio.c:817
static bool is_uri_scheme_char(const int c)
Definition uri_utils.h:97
_WDL_CSTRING_PREFIX void INT_PTR count
Definition wdlcstring.h:263