@@ -697,6 +697,14 @@ json_lex(JsonLexContext *lex)
697
697
698
698
/*
699
699
* The next token in the input stream is known to be a string; lex it.
700
+ *
701
+ * If lex->strval isn't NULL, fill it with the decoded string.
702
+ * Set lex->token_terminator to the end of the decoded input, and in
703
+ * success cases, transfer its previous value to lex->prev_token_terminator.
704
+ * Return JSON_SUCCESS or an error code.
705
+ *
706
+ * Note: be careful that all error exits advance lex->token_terminator
707
+ * to the point after the character we detected the error on.
700
708
*/
701
709
static inline JsonParseErrorType
702
710
json_lex_string (JsonLexContext * lex )
@@ -705,6 +713,19 @@ json_lex_string(JsonLexContext *lex)
705
713
char * const end = lex -> input + lex -> input_length ;
706
714
int hi_surrogate = -1 ;
707
715
716
+ /* Convenience macros for error exits */
717
+ #define FAIL_AT_CHAR_START (code ) \
718
+ do { \
719
+ lex->token_terminator = s; \
720
+ return code; \
721
+ } while (0)
722
+ #define FAIL_AT_CHAR_END (code ) \
723
+ do { \
724
+ lex->token_terminator = \
725
+ s + pg_encoding_mblen_bounded(lex->input_encoding, s); \
726
+ return code; \
727
+ } while (0)
728
+
708
729
if (lex -> strval != NULL )
709
730
resetStringInfo (lex -> strval );
710
731
@@ -715,21 +736,15 @@ json_lex_string(JsonLexContext *lex)
715
736
s ++ ;
716
737
/* Premature end of the string. */
717
738
if (s >= end )
718
- {
719
- lex -> token_terminator = s ;
720
- return JSON_INVALID_TOKEN ;
721
- }
739
+ FAIL_AT_CHAR_START (JSON_INVALID_TOKEN );
722
740
else if (* s == '"' )
723
741
break ;
724
742
else if (* s == '\\' )
725
743
{
726
744
/* OK, we have an escape character. */
727
745
s ++ ;
728
746
if (s >= end )
729
- {
730
- lex -> token_terminator = s ;
731
- return JSON_INVALID_TOKEN ;
732
- }
747
+ FAIL_AT_CHAR_START (JSON_INVALID_TOKEN );
733
748
else if (* s == 'u' )
734
749
{
735
750
int i ;
@@ -739,21 +754,15 @@ json_lex_string(JsonLexContext *lex)
739
754
{
740
755
s ++ ;
741
756
if (s >= end )
742
- {
743
- lex -> token_terminator = s ;
744
- return JSON_INVALID_TOKEN ;
745
- }
757
+ FAIL_AT_CHAR_START (JSON_INVALID_TOKEN );
746
758
else if (* s >= '0' && * s <= '9' )
747
759
ch = (ch * 16 ) + (* s - '0' );
748
760
else if (* s >= 'a' && * s <= 'f' )
749
761
ch = (ch * 16 ) + (* s - 'a' ) + 10 ;
750
762
else if (* s >= 'A' && * s <= 'F' )
751
763
ch = (ch * 16 ) + (* s - 'A' ) + 10 ;
752
764
else
753
- {
754
- lex -> token_terminator = s + pg_encoding_mblen_bounded (lex -> input_encoding , s );
755
- return JSON_UNICODE_ESCAPE_FORMAT ;
756
- }
765
+ FAIL_AT_CHAR_END (JSON_UNICODE_ESCAPE_FORMAT );
757
766
}
758
767
if (lex -> strval != NULL )
759
768
{
@@ -763,20 +772,20 @@ json_lex_string(JsonLexContext *lex)
763
772
if (is_utf16_surrogate_first (ch ))
764
773
{
765
774
if (hi_surrogate != -1 )
766
- return JSON_UNICODE_HIGH_SURROGATE ;
775
+ FAIL_AT_CHAR_END ( JSON_UNICODE_HIGH_SURROGATE ) ;
767
776
hi_surrogate = ch ;
768
777
continue ;
769
778
}
770
779
else if (is_utf16_surrogate_second (ch ))
771
780
{
772
781
if (hi_surrogate == -1 )
773
- return JSON_UNICODE_LOW_SURROGATE ;
782
+ FAIL_AT_CHAR_END ( JSON_UNICODE_LOW_SURROGATE ) ;
774
783
ch = surrogate_pair_to_codepoint (hi_surrogate , ch );
775
784
hi_surrogate = -1 ;
776
785
}
777
786
778
787
if (hi_surrogate != -1 )
779
- return JSON_UNICODE_LOW_SURROGATE ;
788
+ FAIL_AT_CHAR_END ( JSON_UNICODE_LOW_SURROGATE ) ;
780
789
781
790
/*
782
791
* Reject invalid cases. We can't have a value above
@@ -786,7 +795,7 @@ json_lex_string(JsonLexContext *lex)
786
795
if (ch == 0 )
787
796
{
788
797
/* We can't allow this, since our TEXT type doesn't */
789
- return JSON_UNICODE_CODE_POINT_ZERO ;
798
+ FAIL_AT_CHAR_END ( JSON_UNICODE_CODE_POINT_ZERO ) ;
790
799
}
791
800
792
801
/*
@@ -800,7 +809,7 @@ json_lex_string(JsonLexContext *lex)
800
809
char cbuf [MAX_UNICODE_EQUIVALENT_STRING + 1 ];
801
810
802
811
if (!pg_unicode_to_server_noerror (ch , (unsigned char * ) cbuf ))
803
- return JSON_UNICODE_UNTRANSLATABLE ;
812
+ FAIL_AT_CHAR_END ( JSON_UNICODE_UNTRANSLATABLE ) ;
804
813
appendStringInfoString (lex -> strval , cbuf );
805
814
}
806
815
#else
@@ -820,14 +829,14 @@ json_lex_string(JsonLexContext *lex)
820
829
appendStringInfoChar (lex -> strval , (char ) ch );
821
830
}
822
831
else
823
- return JSON_UNICODE_HIGH_ESCAPE ;
832
+ FAIL_AT_CHAR_END ( JSON_UNICODE_HIGH_ESCAPE ) ;
824
833
#endif /* FRONTEND */
825
834
}
826
835
}
827
836
else if (lex -> strval != NULL )
828
837
{
829
838
if (hi_surrogate != -1 )
830
- return JSON_UNICODE_LOW_SURROGATE ;
839
+ FAIL_AT_CHAR_END ( JSON_UNICODE_LOW_SURROGATE ) ;
831
840
832
841
switch (* s )
833
842
{
@@ -852,10 +861,14 @@ json_lex_string(JsonLexContext *lex)
852
861
appendStringInfoChar (lex -> strval , '\t' );
853
862
break ;
854
863
default :
855
- /* Not a valid string escape, so signal error. */
864
+
865
+ /*
866
+ * Not a valid string escape, so signal error. We
867
+ * adjust token_start so that just the escape sequence
868
+ * is reported, not the whole string.
869
+ */
856
870
lex -> token_start = s ;
857
- lex -> token_terminator = s + pg_encoding_mblen_bounded (lex -> input_encoding , s );
858
- return JSON_ESCAPING_INVALID ;
871
+ FAIL_AT_CHAR_END (JSON_ESCAPING_INVALID );
859
872
}
860
873
}
861
874
else if (strchr ("\"\\/bfnrt" , * s ) == NULL )
@@ -868,16 +881,15 @@ json_lex_string(JsonLexContext *lex)
868
881
* shown it's not a performance win.
869
882
*/
870
883
lex -> token_start = s ;
871
- lex -> token_terminator = s + pg_encoding_mblen_bounded (lex -> input_encoding , s );
872
- return JSON_ESCAPING_INVALID ;
884
+ FAIL_AT_CHAR_END (JSON_ESCAPING_INVALID );
873
885
}
874
886
}
875
887
else
876
888
{
877
889
char * p = s ;
878
890
879
891
if (hi_surrogate != -1 )
880
- return JSON_UNICODE_LOW_SURROGATE ;
892
+ FAIL_AT_CHAR_END ( JSON_UNICODE_LOW_SURROGATE ) ;
881
893
882
894
/*
883
895
* Skip to the first byte that requires special handling, so we
@@ -917,12 +929,18 @@ json_lex_string(JsonLexContext *lex)
917
929
}
918
930
919
931
if (hi_surrogate != -1 )
932
+ {
933
+ lex -> token_terminator = s + 1 ;
920
934
return JSON_UNICODE_LOW_SURROGATE ;
935
+ }
921
936
922
937
/* Hooray, we found the end of the string! */
923
938
lex -> prev_token_terminator = lex -> token_terminator ;
924
939
lex -> token_terminator = s + 1 ;
925
940
return JSON_SUCCESS ;
941
+
942
+ #undef FAIL_AT_CHAR_START
943
+ #undef FAIL_AT_CHAR_END
926
944
}
927
945
928
946
/*
0 commit comments