-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcoderange.h
202 lines (182 loc) · 7.91 KB
/
coderange.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#ifndef RUBY_INTERNAL_ENCODING_CODERANGE_H /*-*-C++-*-vi:se ft=cpp:*/
#define RUBY_INTERNAL_ENCODING_CODERANGE_H
/**
* @file
* @author Ruby developers <ruby-core@ruby-lang.org>
* @copyright This file is a part of the programming language Ruby.
* Permission is hereby granted, to either redistribute and/or
* modify this file, provided that the conditions mentioned in the
* file COPYING are met. Consult the file for details.
* @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
* implementation details. Don't take them as canon. They could
* rapidly appear then vanish. The name (path) of this header file
* is also an implementation detail. Do not expect it to persist
* at the place it is now. Developers are free to move it anywhere
* anytime at will.
* @note To ruby-core: remember that this header can be possibly
* recursively included from extension libraries written in C++.
* Do not expect for instance `__VA_ARGS__` is always available.
* We assume C99 for ruby itself but we don't assume languages of
* extension libraries. They could be written in C++98.
* @brief Routines for code ranges.
*/
#include "ruby/internal/attr/const.h"
#include "ruby/internal/attr/pure.h"
#include "ruby/internal/dllexport.h"
#include "ruby/internal/fl_type.h"
#include "ruby/internal/value.h"
RBIMPL_SYMBOL_EXPORT_BEGIN()
/** What rb_enc_str_coderange() returns. */
enum ruby_coderange_type {
/** The object's coderange is unclear yet. */
RUBY_ENC_CODERANGE_UNKNOWN = 0,
/** The object holds 0 to 127 inclusive and nothing else. */
RUBY_ENC_CODERANGE_7BIT = ((int)RUBY_FL_USER8),
/** The object's encoding and contents are consistent each other */
RUBY_ENC_CODERANGE_VALID = ((int)RUBY_FL_USER9),
/** The object holds invalid/malformed/broken character(s). */
RUBY_ENC_CODERANGE_BROKEN = ((int)(RUBY_FL_USER8|RUBY_FL_USER9)),
/** Where the coderange resides. */
RUBY_ENC_CODERANGE_MASK = (RUBY_ENC_CODERANGE_7BIT|
RUBY_ENC_CODERANGE_VALID|
RUBY_ENC_CODERANGE_BROKEN)
};
RBIMPL_ATTR_CONST()
/**
* @private
*
* This is an implementation detail of #RB_ENC_CODERANGE_CLEAN_P. People don't
* use it directly.
*
* @param[in] cr An enum ::ruby_coderange_type.
* @retval 1 It is.
* @retval 0 It isn't.
*/
static inline int
rb_enc_coderange_clean_p(int cr)
{
return (cr ^ (cr >> 1)) & RUBY_ENC_CODERANGE_7BIT;
}
RBIMPL_ATTR_CONST()
/**
* Queries if a code range is "clean". "Clean" in this context means it is
* known and valid.
*
* @param[in] cr An enum ::ruby_coderange_type.
* @retval 1 It is.
* @retval 0 It isn't.
*/
static inline bool
RB_ENC_CODERANGE_CLEAN_P(enum ruby_coderange_type cr)
{
return rb_enc_coderange_clean_p(RBIMPL_CAST((int)cr));
}
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
/**
* Queries the (inline) code range of the passed object. The object must be
* capable of having inline encoding. Using this macro needs deep
* understanding of bit level object binary layout.
*
* @param[in] obj Target object.
* @return An enum ::ruby_coderange_type.
*/
static inline enum ruby_coderange_type
RB_ENC_CODERANGE(VALUE obj)
{
VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENC_CODERANGE_MASK);
return RBIMPL_CAST((enum ruby_coderange_type)ret);
}
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
/**
* Queries the (inline) code range of the passed object is
* ::RUBY_ENC_CODERANGE_7BIT. The object must be capable of having inline
* encoding. Using this macro needs deep understanding of bit level object
* binary layout.
*
* @param[in] obj Target object.
* @retval 1 It is ascii only.
* @retval 0 Otherwise (including cases when the range is not known).
*/
static inline bool
RB_ENC_CODERANGE_ASCIIONLY(VALUE obj)
{
return RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT;
}
/**
* Destructively modifies the passed object so that its (inline) code range is
* the passed one. The object must be capable of having inline encoding.
* Using this macro needs deep understanding of bit level object binary layout.
*
* @param[out] obj Target object.
* @param[out] cr An enum ::ruby_coderange_type.
* @post `obj`'s code range is `cr`.
*/
static inline void
RB_ENC_CODERANGE_SET(VALUE obj, enum ruby_coderange_type cr)
{
RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK);
RB_FL_SET_RAW(obj, cr);
}
/**
* Destructively clears the passed object's (inline) code range. The object
* must be capable of having inline encoding. Using this macro needs deep
* understanding of bit level object binary layout.
*
* @param[out] obj Target object.
* @post `obj`'s code range is ::RUBY_ENC_CODERANGE_UNKNOWN.
*/
static inline void
RB_ENC_CODERANGE_CLEAR(VALUE obj)
{
RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK);
}
RBIMPL_ATTR_CONST()
/* assumed ASCII compatibility */
/**
* "Mix" two code ranges into one. This is handy for instance when you
* concatenate two strings into one. Consider one of then is valid but the
* other isn't. The result must be invalid. This macro computes that kind of
* mixture.
*
* @param[in] a An enum ::ruby_coderange_type.
* @param[in] b Another enum ::ruby_coderange_type.
* @return The `a` "and" `b`.
*/
static inline enum ruby_coderange_type
RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
{
if (a == RUBY_ENC_CODERANGE_7BIT) {
return b;
}
else if (a != RUBY_ENC_CODERANGE_VALID) {
return RUBY_ENC_CODERANGE_UNKNOWN;
}
else if (b == RUBY_ENC_CODERANGE_7BIT) {
return RUBY_ENC_CODERANGE_VALID;
}
else {
return b;
}
}
#define ENC_CODERANGE_MASK RUBY_ENC_CODERANGE_MASK /**< @old{RUBY_ENC_CODERANGE_MASK} */
#define ENC_CODERANGE_UNKNOWN RUBY_ENC_CODERANGE_UNKNOWN /**< @old{RUBY_ENC_CODERANGE_UNKNOWN} */
#define ENC_CODERANGE_7BIT RUBY_ENC_CODERANGE_7BIT /**< @old{RUBY_ENC_CODERANGE_7BIT} */
#define ENC_CODERANGE_VALID RUBY_ENC_CODERANGE_VALID /**< @old{RUBY_ENC_CODERANGE_VALID} */
#define ENC_CODERANGE_BROKEN RUBY_ENC_CODERANGE_BROKEN /**< @old{RUBY_ENC_CODERANGE_BROKEN} */
#define ENC_CODERANGE_CLEAN_P(cr) RB_ENC_CODERANGE_CLEAN_P(cr) /**< @old{RB_ENC_CODERANGE_CLEAN_P} */
#define ENC_CODERANGE(obj) RB_ENC_CODERANGE(obj) /**< @old{RB_ENC_CODERANGE} */
#define ENC_CODERANGE_ASCIIONLY(obj) RB_ENC_CODERANGE_ASCIIONLY(obj) /**< @old{RB_ENC_CODERANGE_ASCIIONLY} */
#define ENC_CODERANGE_SET(obj,cr) RB_ENC_CODERANGE_SET(obj,cr) /**< @old{RB_ENC_CODERANGE_SET} */
#define ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_CLEAR(obj) /**< @old{RB_ENC_CODERANGE_CLEAR} */
#define ENC_CODERANGE_AND(a, b) RB_ENC_CODERANGE_AND(a, b) /**< @old{RB_ENC_CODERANGE_AND} */
#define ENCODING_CODERANGE_SET(obj, encindex, cr) RB_ENCODING_CODERANGE_SET(obj, encindex, cr) /**< @old{RB_ENCODING_CODERANGE_SET} */
/** @cond INTERNAL_MACRO */
#define RB_ENC_CODERANGE RB_ENC_CODERANGE
#define RB_ENC_CODERANGE_AND RB_ENC_CODERANGE_AND
#define RB_ENC_CODERANGE_ASCIIONLY RB_ENC_CODERANGE_ASCIIONLY
#define RB_ENC_CODERANGE_CLEAN_P RB_ENC_CODERANGE_CLEAN_P
#define RB_ENC_CODERANGE_CLEAR RB_ENC_CODERANGE_CLEAR
#define RB_ENC_CODERANGE_SET RB_ENC_CODERANGE_SET
/** @endcond */
RBIMPL_SYMBOL_EXPORT_END()
#endif /* RUBY_INTERNAL_ENCODING_CODERANGE_H */