Ruby 2.7.6p219 (2022-04-12 revision c9c2245c0a25176072e02db9254f0e0c84c805cd)
pack.c
Go to the documentation of this file.
1/**********************************************************************
2
3 pack.c -
4
5 $Author$
6 created at: Thu Feb 10 15:17:05 JST 1994
7
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
9
10**********************************************************************/
11
12#include "ruby/encoding.h"
13#include "internal.h"
14#include <sys/types.h>
15#include <ctype.h>
16#include <errno.h>
17#include <float.h>
18#include "builtin.h"
19
20/*
21 * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
22 * instead of HAVE_LONG_LONG or LONG_LONG.
23 * This means q! and Q! means always the standard long long type and
24 * causes ArgumentError for platforms which has no long long type,
25 * even if the platform has an implementation specific 64bit type.
26 * This behavior is consistent with the document of pack/unpack.
27 */
28#ifdef HAVE_TRUE_LONG_LONG
29static const char natstr[] = "sSiIlLqQjJ";
30#else
31static const char natstr[] = "sSiIlLjJ";
32#endif
33static const char endstr[] = "sSiIlLqQjJ";
34
35#ifdef HAVE_TRUE_LONG_LONG
36/* It is intentional to use long long instead of LONG_LONG. */
37# define NATINT_LEN_Q NATINT_LEN(long long, 8)
38#else
39# define NATINT_LEN_Q 8
40#endif
41
42#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
43# define NATINT_PACK
44#endif
45
46#ifdef DYNAMIC_ENDIAN
47/* for universal binary of NEXTSTEP and MacOS X */
48/* useless since autoconf 2.63? */
49static int
50is_bigendian(void)
51{
52 static int init = 0;
53 static int endian_value;
54 char *p;
55
56 if (init) return endian_value;
57 init = 1;
58 p = (char*)&init;
59 return endian_value = p[0]?0:1;
60}
61# define BIGENDIAN_P() (is_bigendian())
62#elif defined(WORDS_BIGENDIAN)
63# define BIGENDIAN_P() 1
64#else
65# define BIGENDIAN_P() 0
66#endif
67
68#ifdef NATINT_PACK
69# define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
70#else
71# define NATINT_LEN(type,len) ((int)sizeof(type))
72#endif
73
74typedef union {
75 float f;
77 char buf[4];
79typedef union {
80 double d;
82 char buf[8];
84#define swapf(x) swap32(x)
85#define swapd(x) swap64(x)
86
87#define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
88#define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
89#define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
90#define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
91#define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
92#define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
93#define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
94#define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
95
96#define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
97#define HTONF(x) ((x).u = rb_htonf((x).u))
98#define HTOVF(x) ((x).u = rb_htovf((x).u))
99#define NTOHF(x) ((x).u = rb_ntohf((x).u))
100#define VTOHF(x) ((x).u = rb_vtohf((x).u))
101
102#define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
103#define HTOND(x) ((x).u = rb_htond((x).u))
104#define HTOVD(x) ((x).u = rb_htovd((x).u))
105#define NTOHD(x) ((x).u = rb_ntohd((x).u))
106#define VTOHD(x) ((x).u = rb_vtohd((x).u))
107
108#define MAX_INTEGER_PACK_SIZE 8
109
110static const char toofew[] = "too few arguments";
111
112static void encodes(VALUE,const char*,long,int,int);
113static void qpencode(VALUE,VALUE,long);
114
115static unsigned long utf8_to_uv(const char*,long*);
116
117static ID id_associated;
118
119static void
120str_associate(VALUE str, VALUE add)
121{
122 /* assert(NIL_P(rb_attr_get(str, id_associated))); */
123 rb_ivar_set(str, id_associated, add);
124}
125
126static VALUE
127str_associated(VALUE str)
128{
129 return rb_ivar_lookup(str, id_associated, Qfalse);
130}
131
132static void
133unknown_directive(const char *mode, char type, VALUE fmt)
134{
135 VALUE f;
136 char unknown[5];
137
138 if (ISPRINT(type)) {
139 unknown[0] = type;
140 unknown[1] = '\0';
141 }
142 else {
143 snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
144 }
146 if (f != fmt) {
147 fmt = rb_str_subseq(f, 1, RSTRING_LEN(f) - 2);
148 }
149 rb_warning("unknown %s directive '%s' in '%"PRIsVALUE"'",
150 mode, unknown, fmt);
151}
152
153static float
154VALUE_to_float(VALUE obj)
155{
157 double d = RFLOAT_VALUE(v);
158
159 if (isnan(d)) {
160 return NAN;
161 }
162 else if (d < -FLT_MAX) {
163 return -INFINITY;
164 }
165 else if (d <= FLT_MAX) {
166 return d;
167 }
168 else {
169 return INFINITY;
170 }
171}
172
173static VALUE
174pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
175{
176 static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
177 static const char spc10[] = " ";
178 const char *p, *pend;
179 VALUE res, from, associates = 0;
180 char type;
181 long len, idx, plen;
182 const char *ptr;
183 int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
184#ifdef NATINT_PACK
185 int natint; /* native integer */
186#endif
187 int integer_size, bigendian_p;
188
190 p = RSTRING_PTR(fmt);
191 pend = p + RSTRING_LEN(fmt);
192
193 if (NIL_P(buffer)) {
194 res = rb_str_buf_new(0);
195 }
196 else {
197 if (!RB_TYPE_P(buffer, T_STRING))
198 rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
199 res = buffer;
200 }
201
202 idx = 0;
203
204#define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
205#define MORE_ITEM (idx < RARRAY_LEN(ary))
206#define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
207#define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
208
209 while (p < pend) {
210 int explicit_endian = 0;
211 if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
212 rb_raise(rb_eRuntimeError, "format string modified");
213 }
214 type = *p++; /* get data type */
215#ifdef NATINT_PACK
216 natint = 0;
217#endif
218
219 if (ISSPACE(type)) continue;
220 if (type == '#') {
221 while ((p < pend) && (*p != '\n')) {
222 p++;
223 }
224 continue;
225 }
226
227 {
228 modifiers:
229 switch (*p) {
230 case '_':
231 case '!':
232 if (strchr(natstr, type)) {
233#ifdef NATINT_PACK
234 natint = 1;
235#endif
236 p++;
237 }
238 else {
239 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
240 }
241 goto modifiers;
242
243 case '<':
244 case '>':
245 if (!strchr(endstr, type)) {
246 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
247 }
248 if (explicit_endian) {
249 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
250 }
251 explicit_endian = *p++;
252 goto modifiers;
253 }
254 }
255
256 if (*p == '*') { /* set data length */
257 len = strchr("@Xxu", type) ? 0
258 : strchr("PMm", type) ? 1
259 : RARRAY_LEN(ary) - idx;
260 p++;
261 }
262 else if (ISDIGIT(*p)) {
263 errno = 0;
264 len = STRTOUL(p, (char**)&p, 10);
265 if (errno) {
266 rb_raise(rb_eRangeError, "pack length too big");
267 }
268 }
269 else {
270 len = 1;
271 }
272
273 switch (type) {
274 case 'U':
275 /* if encoding is US-ASCII, upgrade to UTF-8 */
276 if (enc_info == 1) enc_info = 2;
277 break;
278 case 'm': case 'M': case 'u':
279 /* keep US-ASCII (do nothing) */
280 break;
281 default:
282 /* fall back to BINARY */
283 enc_info = 0;
284 break;
285 }
286 switch (type) {
287 case 'A': case 'a': case 'Z':
288 case 'B': case 'b':
289 case 'H': case 'h':
290 from = NEXTFROM;
291 if (NIL_P(from)) {
292 ptr = "";
293 plen = 0;
294 }
295 else {
296 StringValue(from);
297 ptr = RSTRING_PTR(from);
298 plen = RSTRING_LEN(from);
299 }
300
301 if (p[-1] == '*')
302 len = plen;
303
304 switch (type) {
305 case 'a': /* arbitrary binary string (null padded) */
306 case 'A': /* arbitrary binary string (ASCII space padded) */
307 case 'Z': /* null terminated string */
308 if (plen >= len) {
309 rb_str_buf_cat(res, ptr, len);
310 if (p[-1] == '*' && type == 'Z')
311 rb_str_buf_cat(res, nul10, 1);
312 }
313 else {
314 rb_str_buf_cat(res, ptr, plen);
315 len -= plen;
316 while (len >= 10) {
317 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
318 len -= 10;
319 }
320 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
321 }
322 break;
323
324#define castchar(from) (char)((from) & 0xff)
325
326 case 'b': /* bit string (ascending) */
327 {
328 int byte = 0;
329 long i, j = 0;
330
331 if (len > plen) {
332 j = (len - plen + 1)/2;
333 len = plen;
334 }
335 for (i=0; i++ < len; ptr++) {
336 if (*ptr & 1)
337 byte |= 128;
338 if (i & 7)
339 byte >>= 1;
340 else {
341 char c = castchar(byte);
342 rb_str_buf_cat(res, &c, 1);
343 byte = 0;
344 }
345 }
346 if (len & 7) {
347 char c;
348 byte >>= 7 - (len & 7);
349 c = castchar(byte);
350 rb_str_buf_cat(res, &c, 1);
351 }
352 len = j;
353 goto grow;
354 }
355 break;
356
357 case 'B': /* bit string (descending) */
358 {
359 int byte = 0;
360 long i, j = 0;
361
362 if (len > plen) {
363 j = (len - plen + 1)/2;
364 len = plen;
365 }
366 for (i=0; i++ < len; ptr++) {
367 byte |= *ptr & 1;
368 if (i & 7)
369 byte <<= 1;
370 else {
371 char c = castchar(byte);
372 rb_str_buf_cat(res, &c, 1);
373 byte = 0;
374 }
375 }
376 if (len & 7) {
377 char c;
378 byte <<= 7 - (len & 7);
379 c = castchar(byte);
380 rb_str_buf_cat(res, &c, 1);
381 }
382 len = j;
383 goto grow;
384 }
385 break;
386
387 case 'h': /* hex string (low nibble first) */
388 {
389 int byte = 0;
390 long i, j = 0;
391
392 if (len > plen) {
393 j = (len + 1) / 2 - (plen + 1) / 2;
394 len = plen;
395 }
396 for (i=0; i++ < len; ptr++) {
397 if (ISALPHA(*ptr))
398 byte |= (((*ptr & 15) + 9) & 15) << 4;
399 else
400 byte |= (*ptr & 15) << 4;
401 if (i & 1)
402 byte >>= 4;
403 else {
404 char c = castchar(byte);
405 rb_str_buf_cat(res, &c, 1);
406 byte = 0;
407 }
408 }
409 if (len & 1) {
410 char c = castchar(byte);
411 rb_str_buf_cat(res, &c, 1);
412 }
413 len = j;
414 goto grow;
415 }
416 break;
417
418 case 'H': /* hex string (high nibble first) */
419 {
420 int byte = 0;
421 long i, j = 0;
422
423 if (len > plen) {
424 j = (len + 1) / 2 - (plen + 1) / 2;
425 len = plen;
426 }
427 for (i=0; i++ < len; ptr++) {
428 if (ISALPHA(*ptr))
429 byte |= ((*ptr & 15) + 9) & 15;
430 else
431 byte |= *ptr & 15;
432 if (i & 1)
433 byte <<= 4;
434 else {
435 char c = castchar(byte);
436 rb_str_buf_cat(res, &c, 1);
437 byte = 0;
438 }
439 }
440 if (len & 1) {
441 char c = castchar(byte);
442 rb_str_buf_cat(res, &c, 1);
443 }
444 len = j;
445 goto grow;
446 }
447 break;
448 }
449 break;
450
451 case 'c': /* signed char */
452 case 'C': /* unsigned char */
453 integer_size = 1;
454 bigendian_p = BIGENDIAN_P(); /* not effective */
455 goto pack_integer;
456
457 case 's': /* s for int16_t, s! for signed short */
458 integer_size = NATINT_LEN(short, 2);
459 bigendian_p = BIGENDIAN_P();
460 goto pack_integer;
461
462 case 'S': /* S for uint16_t, S! for unsigned short */
463 integer_size = NATINT_LEN(short, 2);
464 bigendian_p = BIGENDIAN_P();
465 goto pack_integer;
466
467 case 'i': /* i and i! for signed int */
468 integer_size = (int)sizeof(int);
469 bigendian_p = BIGENDIAN_P();
470 goto pack_integer;
471
472 case 'I': /* I and I! for unsigned int */
473 integer_size = (int)sizeof(int);
474 bigendian_p = BIGENDIAN_P();
475 goto pack_integer;
476
477 case 'l': /* l for int32_t, l! for signed long */
478 integer_size = NATINT_LEN(long, 4);
479 bigendian_p = BIGENDIAN_P();
480 goto pack_integer;
481
482 case 'L': /* L for uint32_t, L! for unsigned long */
483 integer_size = NATINT_LEN(long, 4);
484 bigendian_p = BIGENDIAN_P();
485 goto pack_integer;
486
487 case 'q': /* q for int64_t, q! for signed long long */
488 integer_size = NATINT_LEN_Q;
489 bigendian_p = BIGENDIAN_P();
490 goto pack_integer;
491
492 case 'Q': /* Q for uint64_t, Q! for unsigned long long */
493 integer_size = NATINT_LEN_Q;
494 bigendian_p = BIGENDIAN_P();
495 goto pack_integer;
496
497 case 'j': /* j for intptr_t */
498 integer_size = sizeof(intptr_t);
499 bigendian_p = BIGENDIAN_P();
500 goto pack_integer;
501
502 case 'J': /* J for uintptr_t */
503 integer_size = sizeof(uintptr_t);
504 bigendian_p = BIGENDIAN_P();
505 goto pack_integer;
506
507 case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
508 integer_size = 2;
509 bigendian_p = 1;
510 goto pack_integer;
511
512 case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
513 integer_size = 4;
514 bigendian_p = 1;
515 goto pack_integer;
516
517 case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
518 integer_size = 2;
519 bigendian_p = 0;
520 goto pack_integer;
521
522 case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
523 integer_size = 4;
524 bigendian_p = 0;
525 goto pack_integer;
526
527 pack_integer:
528 if (explicit_endian) {
529 bigendian_p = explicit_endian == '>';
530 }
531 if (integer_size > MAX_INTEGER_PACK_SIZE)
532 rb_bug("unexpected intger size for pack: %d", integer_size);
533 while (len-- > 0) {
534 char intbuf[MAX_INTEGER_PACK_SIZE];
535
536 from = NEXTFROM;
537 rb_integer_pack(from, intbuf, integer_size, 1, 0,
540 rb_str_buf_cat(res, intbuf, integer_size);
541 }
542 break;
543
544 case 'f': /* single precision float in native format */
545 case 'F': /* ditto */
546 while (len-- > 0) {
547 float f;
548
549 from = NEXTFROM;
550 f = VALUE_to_float(from);
551 rb_str_buf_cat(res, (char*)&f, sizeof(float));
552 }
553 break;
554
555 case 'e': /* single precision float in VAX byte-order */
556 while (len-- > 0) {
557 FLOAT_CONVWITH(tmp);
558
559 from = NEXTFROM;
560 tmp.f = VALUE_to_float(from);
561 HTOVF(tmp);
562 rb_str_buf_cat(res, tmp.buf, sizeof(float));
563 }
564 break;
565
566 case 'E': /* double precision float in VAX byte-order */
567 while (len-- > 0) {
568 DOUBLE_CONVWITH(tmp);
569 from = NEXTFROM;
570 tmp.d = RFLOAT_VALUE(rb_to_float(from));
571 HTOVD(tmp);
572 rb_str_buf_cat(res, tmp.buf, sizeof(double));
573 }
574 break;
575
576 case 'd': /* double precision float in native format */
577 case 'D': /* ditto */
578 while (len-- > 0) {
579 double d;
580
581 from = NEXTFROM;
582 d = RFLOAT_VALUE(rb_to_float(from));
583 rb_str_buf_cat(res, (char*)&d, sizeof(double));
584 }
585 break;
586
587 case 'g': /* single precision float in network byte-order */
588 while (len-- > 0) {
589 FLOAT_CONVWITH(tmp);
590 from = NEXTFROM;
591 tmp.f = VALUE_to_float(from);
592 HTONF(tmp);
593 rb_str_buf_cat(res, tmp.buf, sizeof(float));
594 }
595 break;
596
597 case 'G': /* double precision float in network byte-order */
598 while (len-- > 0) {
599 DOUBLE_CONVWITH(tmp);
600
601 from = NEXTFROM;
602 tmp.d = RFLOAT_VALUE(rb_to_float(from));
603 HTOND(tmp);
604 rb_str_buf_cat(res, tmp.buf, sizeof(double));
605 }
606 break;
607
608 case 'x': /* null byte */
609 grow:
610 while (len >= 10) {
611 rb_str_buf_cat(res, nul10, 10);
612 len -= 10;
613 }
614 rb_str_buf_cat(res, nul10, len);
615 break;
616
617 case 'X': /* back up byte */
618 shrink:
619 plen = RSTRING_LEN(res);
620 if (plen < len)
621 rb_raise(rb_eArgError, "X outside of string");
622 rb_str_set_len(res, plen - len);
623 break;
624
625 case '@': /* null fill to absolute position */
626 len -= RSTRING_LEN(res);
627 if (len > 0) goto grow;
628 len = -len;
629 if (len > 0) goto shrink;
630 break;
631
632 case '%':
633 rb_raise(rb_eArgError, "%% is not supported");
634 break;
635
636 case 'U': /* Unicode character */
637 while (len-- > 0) {
638 SIGNED_VALUE l;
639 char buf[8];
640 int le;
641
642 from = NEXTFROM;
643 from = rb_to_int(from);
644 l = NUM2LONG(from);
645 if (l < 0) {
646 rb_raise(rb_eRangeError, "pack(U): value out of range");
647 }
648 le = rb_uv_to_utf8(buf, l);
649 rb_str_buf_cat(res, (char*)buf, le);
650 }
651 break;
652
653 case 'u': /* uuencoded string */
654 case 'm': /* base64 encoded string */
655 from = NEXTFROM;
656 StringValue(from);
657 ptr = RSTRING_PTR(from);
658 plen = RSTRING_LEN(from);
659
660 if (len == 0 && type == 'm') {
661 encodes(res, ptr, plen, type, 0);
662 ptr += plen;
663 break;
664 }
665 if (len <= 2)
666 len = 45;
667 else if (len > 63 && type == 'u')
668 len = 63;
669 else
670 len = len / 3 * 3;
671 while (plen > 0) {
672 long todo;
673
674 if (plen > len)
675 todo = len;
676 else
677 todo = plen;
678 encodes(res, ptr, todo, type, 1);
679 plen -= todo;
680 ptr += todo;
681 }
682 break;
683
684 case 'M': /* quoted-printable encoded string */
686 if (len <= 1)
687 len = 72;
688 qpencode(res, from, len);
689 break;
690
691 case 'P': /* pointer to packed byte string */
692 from = THISFROM;
693 if (!NIL_P(from)) {
694 StringValue(from);
695 if (RSTRING_LEN(from) < len) {
696 rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
697 RSTRING_LEN(from), len);
698 }
699 }
700 len = 1;
701 /* FALL THROUGH */
702 case 'p': /* pointer to string */
703 while (len-- > 0) {
704 char *t;
705 from = NEXTFROM;
706 if (NIL_P(from)) {
707 t = 0;
708 }
709 else {
710 t = StringValuePtr(from);
711 }
712 if (!associates) {
713 associates = rb_ary_new();
714 }
715 rb_ary_push(associates, from);
716 rb_str_buf_cat(res, (char*)&t, sizeof(char*));
717 }
718 break;
719
720 case 'w': /* BER compressed integer */
721 while (len-- > 0) {
722 VALUE buf = rb_str_new(0, 0);
723 size_t numbytes;
724 int sign;
725 char *cp;
726
727 from = NEXTFROM;
728 from = rb_to_int(from);
729 numbytes = rb_absint_numwords(from, 7, NULL);
730 if (numbytes == 0)
731 numbytes = 1;
732 buf = rb_str_new(NULL, numbytes);
733
735
736 if (sign < 0)
737 rb_raise(rb_eArgError, "can't compress negative numbers");
738 if (sign == 2)
739 rb_bug("buffer size problem?");
740
741 cp = RSTRING_PTR(buf);
742 while (1 < numbytes) {
743 *cp |= 0x80;
744 cp++;
745 numbytes--;
746 }
747
749 }
750 break;
751
752 default: {
753 unknown_directive("pack", type, fmt);
754 break;
755 }
756 }
757 }
758
759 if (associates) {
760 str_associate(res, associates);
761 }
762 switch (enc_info) {
763 case 1:
765 break;
766 case 2:
768 break;
769 default:
770 /* do nothing, keep ASCII-8BIT */
771 break;
772 }
773 return res;
774}
775
776static const char uu_table[] =
777"`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
778static const char b64_table[] =
779"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
780
781static void
782encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
783{
784 enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
785 char buff[buff_size + 1]; /* +1 for tail_lf */
786 long i = 0;
787 const char *const trans = type == 'u' ? uu_table : b64_table;
788 char padding;
789 const unsigned char *s = (const unsigned char *)s0;
790
791 if (type == 'u') {
792 buff[i++] = (char)len + ' ';
793 padding = '`';
794 }
795 else {
796 padding = '=';
797 }
798 while (len >= input_unit) {
799 while (len >= input_unit && buff_size-i >= encoded_unit) {
800 buff[i++] = trans[077 & (*s >> 2)];
801 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
802 buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
803 buff[i++] = trans[077 & s[2]];
804 s += input_unit;
805 len -= input_unit;
806 }
807 if (buff_size-i < encoded_unit) {
808 rb_str_buf_cat(str, buff, i);
809 i = 0;
810 }
811 }
812
813 if (len == 2) {
814 buff[i++] = trans[077 & (*s >> 2)];
815 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
816 buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
817 buff[i++] = padding;
818 }
819 else if (len == 1) {
820 buff[i++] = trans[077 & (*s >> 2)];
821 buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
822 buff[i++] = padding;
823 buff[i++] = padding;
824 }
825 if (tail_lf) buff[i++] = '\n';
826 rb_str_buf_cat(str, buff, i);
827 if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
828}
829
830static const char hex_table[] = "0123456789ABCDEF";
831
832static void
833qpencode(VALUE str, VALUE from, long len)
834{
835 char buff[1024];
836 long i = 0, n = 0, prev = EOF;
837 unsigned char *s = (unsigned char*)RSTRING_PTR(from);
838 unsigned char *send = s + RSTRING_LEN(from);
839
840 while (s < send) {
841 if ((*s > 126) ||
842 (*s < 32 && *s != '\n' && *s != '\t') ||
843 (*s == '=')) {
844 buff[i++] = '=';
845 buff[i++] = hex_table[*s >> 4];
846 buff[i++] = hex_table[*s & 0x0f];
847 n += 3;
848 prev = EOF;
849 }
850 else if (*s == '\n') {
851 if (prev == ' ' || prev == '\t') {
852 buff[i++] = '=';
853 buff[i++] = *s;
854 }
855 buff[i++] = *s;
856 n = 0;
857 prev = *s;
858 }
859 else {
860 buff[i++] = *s;
861 n++;
862 prev = *s;
863 }
864 if (n > len) {
865 buff[i++] = '=';
866 buff[i++] = '\n';
867 n = 0;
868 prev = '\n';
869 }
870 if (i > 1024 - 5) {
871 rb_str_buf_cat(str, buff, i);
872 i = 0;
873 }
874 s++;
875 }
876 if (n > 0) {
877 buff[i++] = '=';
878 buff[i++] = '\n';
879 }
880 if (i > 0) {
881 rb_str_buf_cat(str, buff, i);
882 }
883}
884
885static inline int
886hex2num(char c)
887{
888 int n;
889 n = ruby_digit36_to_number_table[(unsigned char)c];
890 if (16 <= n)
891 n = -1;
892 return n;
893}
894
895#define PACK_LENGTH_ADJUST_SIZE(sz) do { \
896 tmp_len = 0; \
897 if (len > (long)((send-s)/(sz))) { \
898 if (!star) { \
899 tmp_len = len-(send-s)/(sz); \
900 } \
901 len = (send-s)/(sz); \
902 } \
903} while (0)
904
905#define PACK_ITEM_ADJUST() do { \
906 if (tmp_len > 0 && mode == UNPACK_ARRAY) \
907 rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
908} while (0)
909
910/* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
911 * 12.4/12.5/12.6 C compiler optimization bug
912 * with "-xO4" optimization option.
913 */
914#if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
915# define AVOID_CC_BUG volatile
916#else
917# define AVOID_CC_BUG
918#endif
919
920/* unpack mode */
921#define UNPACK_ARRAY 0
922#define UNPACK_BLOCK 1
923#define UNPACK_1 2
924
925static VALUE
926pack_unpack_internal(VALUE str, VALUE fmt, int mode)
927{
928#define hexdigits ruby_hexdigits
929 char *s, *send;
930 char *p, *pend;
931 VALUE ary;
932 char type;
933 long len;
934 AVOID_CC_BUG long tmp_len;
935 int star;
936#ifdef NATINT_PACK
937 int natint; /* native integer */
938#endif
939 int signed_p, integer_size, bigendian_p;
940#define UNPACK_PUSH(item) do {\
941 VALUE item_val = (item);\
942 if ((mode) == UNPACK_BLOCK) {\
943 rb_yield(item_val);\
944 }\
945 else if ((mode) == UNPACK_ARRAY) {\
946 rb_ary_push(ary, item_val);\
947 }\
948 else /* if ((mode) == UNPACK_1) { */ {\
949 return item_val; \
950 }\
951 } while (0)
952
955 s = RSTRING_PTR(str);
956 send = s + RSTRING_LEN(str);
957 p = RSTRING_PTR(fmt);
958 pend = p + RSTRING_LEN(fmt);
959
960 ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
961 while (p < pend) {
962 int explicit_endian = 0;
963 type = *p++;
964#ifdef NATINT_PACK
965 natint = 0;
966#endif
967
968 if (ISSPACE(type)) continue;
969 if (type == '#') {
970 while ((p < pend) && (*p != '\n')) {
971 p++;
972 }
973 continue;
974 }
975
976 star = 0;
977 {
978 modifiers:
979 switch (*p) {
980 case '_':
981 case '!':
982
983 if (strchr(natstr, type)) {
984#ifdef NATINT_PACK
985 natint = 1;
986#endif
987 p++;
988 }
989 else {
990 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
991 }
992 goto modifiers;
993
994 case '<':
995 case '>':
996 if (!strchr(endstr, type)) {
997 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
998 }
999 if (explicit_endian) {
1000 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
1001 }
1002 explicit_endian = *p++;
1003 goto modifiers;
1004 }
1005 }
1006
1007 if (p >= pend)
1008 len = 1;
1009 else if (*p == '*') {
1010 star = 1;
1011 len = send - s;
1012 p++;
1013 }
1014 else if (ISDIGIT(*p)) {
1015 errno = 0;
1016 len = STRTOUL(p, (char**)&p, 10);
1017 if (len < 0 || errno) {
1018 rb_raise(rb_eRangeError, "pack length too big");
1019 }
1020 }
1021 else {
1022 len = (type != '@');
1023 }
1024
1025 switch (type) {
1026 case '%':
1027 rb_raise(rb_eArgError, "%% is not supported");
1028 break;
1029
1030 case 'A':
1031 if (len > send - s) len = send - s;
1032 {
1033 long end = len;
1034 char *t = s + len - 1;
1035
1036 while (t >= s) {
1037 if (*t != ' ' && *t != '\0') break;
1038 t--; len--;
1039 }
1041 s += end;
1042 }
1043 break;
1044
1045 case 'Z':
1046 {
1047 char *t = s;
1048
1049 if (len > send-s) len = send-s;
1050 while (t < s+len && *t) t++;
1051 UNPACK_PUSH(rb_str_new(s, t-s));
1052 if (t < send) t++;
1053 s = star ? t : s+len;
1054 }
1055 break;
1056
1057 case 'a':
1058 if (len > send - s) len = send - s;
1060 s += len;
1061 break;
1062
1063 case 'b':
1064 {
1065 VALUE bitstr;
1066 char *t;
1067 int bits;
1068 long i;
1069
1070 if (p[-1] == '*' || len > (send - s) * 8)
1071 len = (send - s) * 8;
1072 bits = 0;
1073 bitstr = rb_usascii_str_new(0, len);
1074 t = RSTRING_PTR(bitstr);
1075 for (i=0; i<len; i++) {
1076 if (i & 7) bits >>= 1;
1077 else bits = (unsigned char)*s++;
1078 *t++ = (bits & 1) ? '1' : '0';
1079 }
1080 UNPACK_PUSH(bitstr);
1081 }
1082 break;
1083
1084 case 'B':
1085 {
1086 VALUE bitstr;
1087 char *t;
1088 int bits;
1089 long i;
1090
1091 if (p[-1] == '*' || len > (send - s) * 8)
1092 len = (send - s) * 8;
1093 bits = 0;
1094 bitstr = rb_usascii_str_new(0, len);
1095 t = RSTRING_PTR(bitstr);
1096 for (i=0; i<len; i++) {
1097 if (i & 7) bits <<= 1;
1098 else bits = (unsigned char)*s++;
1099 *t++ = (bits & 128) ? '1' : '0';
1100 }
1101 UNPACK_PUSH(bitstr);
1102 }
1103 break;
1104
1105 case 'h':
1106 {
1107 VALUE bitstr;
1108 char *t;
1109 int bits;
1110 long i;
1111
1112 if (p[-1] == '*' || len > (send - s) * 2)
1113 len = (send - s) * 2;
1114 bits = 0;
1115 bitstr = rb_usascii_str_new(0, len);
1116 t = RSTRING_PTR(bitstr);
1117 for (i=0; i<len; i++) {
1118 if (i & 1)
1119 bits >>= 4;
1120 else
1121 bits = (unsigned char)*s++;
1122 *t++ = hexdigits[bits & 15];
1123 }
1124 UNPACK_PUSH(bitstr);
1125 }
1126 break;
1127
1128 case 'H':
1129 {
1130 VALUE bitstr;
1131 char *t;
1132 int bits;
1133 long i;
1134
1135 if (p[-1] == '*' || len > (send - s) * 2)
1136 len = (send - s) * 2;
1137 bits = 0;
1138 bitstr = rb_usascii_str_new(0, len);
1139 t = RSTRING_PTR(bitstr);
1140 for (i=0; i<len; i++) {
1141 if (i & 1)
1142 bits <<= 4;
1143 else
1144 bits = (unsigned char)*s++;
1145 *t++ = hexdigits[(bits >> 4) & 15];
1146 }
1147 UNPACK_PUSH(bitstr);
1148 }
1149 break;
1150
1151 case 'c':
1152 signed_p = 1;
1153 integer_size = 1;
1154 bigendian_p = BIGENDIAN_P(); /* not effective */
1155 goto unpack_integer;
1156
1157 case 'C':
1158 signed_p = 0;
1159 integer_size = 1;
1160 bigendian_p = BIGENDIAN_P(); /* not effective */
1161 goto unpack_integer;
1162
1163 case 's':
1164 signed_p = 1;
1165 integer_size = NATINT_LEN(short, 2);
1166 bigendian_p = BIGENDIAN_P();
1167 goto unpack_integer;
1168
1169 case 'S':
1170 signed_p = 0;
1171 integer_size = NATINT_LEN(short, 2);
1172 bigendian_p = BIGENDIAN_P();
1173 goto unpack_integer;
1174
1175 case 'i':
1176 signed_p = 1;
1177 integer_size = (int)sizeof(int);
1178 bigendian_p = BIGENDIAN_P();
1179 goto unpack_integer;
1180
1181 case 'I':
1182 signed_p = 0;
1183 integer_size = (int)sizeof(int);
1184 bigendian_p = BIGENDIAN_P();
1185 goto unpack_integer;
1186
1187 case 'l':
1188 signed_p = 1;
1189 integer_size = NATINT_LEN(long, 4);
1190 bigendian_p = BIGENDIAN_P();
1191 goto unpack_integer;
1192
1193 case 'L':
1194 signed_p = 0;
1195 integer_size = NATINT_LEN(long, 4);
1196 bigendian_p = BIGENDIAN_P();
1197 goto unpack_integer;
1198
1199 case 'q':
1200 signed_p = 1;
1201 integer_size = NATINT_LEN_Q;
1202 bigendian_p = BIGENDIAN_P();
1203 goto unpack_integer;
1204
1205 case 'Q':
1206 signed_p = 0;
1207 integer_size = NATINT_LEN_Q;
1208 bigendian_p = BIGENDIAN_P();
1209 goto unpack_integer;
1210
1211 case 'j':
1212 signed_p = 1;
1213 integer_size = sizeof(intptr_t);
1214 bigendian_p = BIGENDIAN_P();
1215 goto unpack_integer;
1216
1217 case 'J':
1218 signed_p = 0;
1219 integer_size = sizeof(uintptr_t);
1220 bigendian_p = BIGENDIAN_P();
1221 goto unpack_integer;
1222
1223 case 'n':
1224 signed_p = 0;
1225 integer_size = 2;
1226 bigendian_p = 1;
1227 goto unpack_integer;
1228
1229 case 'N':
1230 signed_p = 0;
1231 integer_size = 4;
1232 bigendian_p = 1;
1233 goto unpack_integer;
1234
1235 case 'v':
1236 signed_p = 0;
1237 integer_size = 2;
1238 bigendian_p = 0;
1239 goto unpack_integer;
1240
1241 case 'V':
1242 signed_p = 0;
1243 integer_size = 4;
1244 bigendian_p = 0;
1245 goto unpack_integer;
1246
1247 unpack_integer:
1248 if (explicit_endian) {
1249 bigendian_p = explicit_endian == '>';
1250 }
1251 PACK_LENGTH_ADJUST_SIZE(integer_size);
1252 while (len-- > 0) {
1253 int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
1254 VALUE val;
1255 if (signed_p)
1256 flags |= INTEGER_PACK_2COMP;
1257 val = rb_integer_unpack(s, integer_size, 1, 0, flags);
1258 UNPACK_PUSH(val);
1259 s += integer_size;
1260 }
1262 break;
1263
1264 case 'f':
1265 case 'F':
1266 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1267 while (len-- > 0) {
1268 float tmp;
1269 memcpy(&tmp, s, sizeof(float));
1270 s += sizeof(float);
1271 UNPACK_PUSH(DBL2NUM((double)tmp));
1272 }
1274 break;
1275
1276 case 'e':
1277 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1278 while (len-- > 0) {
1279 FLOAT_CONVWITH(tmp);
1280 memcpy(tmp.buf, s, sizeof(float));
1281 s += sizeof(float);
1282 VTOHF(tmp);
1283 UNPACK_PUSH(DBL2NUM(tmp.f));
1284 }
1286 break;
1287
1288 case 'E':
1289 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1290 while (len-- > 0) {
1291 DOUBLE_CONVWITH(tmp);
1292 memcpy(tmp.buf, s, sizeof(double));
1293 s += sizeof(double);
1294 VTOHD(tmp);
1295 UNPACK_PUSH(DBL2NUM(tmp.d));
1296 }
1298 break;
1299
1300 case 'D':
1301 case 'd':
1302 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1303 while (len-- > 0) {
1304 double tmp;
1305 memcpy(&tmp, s, sizeof(double));
1306 s += sizeof(double);
1307 UNPACK_PUSH(DBL2NUM(tmp));
1308 }
1310 break;
1311
1312 case 'g':
1313 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1314 while (len-- > 0) {
1315 FLOAT_CONVWITH(tmp);
1316 memcpy(tmp.buf, s, sizeof(float));
1317 s += sizeof(float);
1318 NTOHF(tmp);
1319 UNPACK_PUSH(DBL2NUM(tmp.f));
1320 }
1322 break;
1323
1324 case 'G':
1325 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1326 while (len-- > 0) {
1327 DOUBLE_CONVWITH(tmp);
1328 memcpy(tmp.buf, s, sizeof(double));
1329 s += sizeof(double);
1330 NTOHD(tmp);
1331 UNPACK_PUSH(DBL2NUM(tmp.d));
1332 }
1334 break;
1335
1336 case 'U':
1337 if (len > send - s) len = send - s;
1338 while (len > 0 && s < send) {
1339 long alen = send - s;
1340 unsigned long l;
1341
1342 l = utf8_to_uv(s, &alen);
1343 s += alen; len--;
1345 }
1346 break;
1347
1348 case 'u':
1349 {
1350 VALUE buf = rb_str_new(0, (send - s)*3/4);
1351 char *ptr = RSTRING_PTR(buf);
1352 long total = 0;
1353
1354 while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1355 long a,b,c,d;
1356 char hunk[3];
1357
1358 len = ((unsigned char)*s++ - ' ') & 077;
1359
1360 total += len;
1361 if (total > RSTRING_LEN(buf)) {
1362 len -= total - RSTRING_LEN(buf);
1363 total = RSTRING_LEN(buf);
1364 }
1365
1366 while (len > 0) {
1367 long mlen = len > 3 ? 3 : len;
1368
1369 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1370 a = ((unsigned char)*s++ - ' ') & 077;
1371 else
1372 a = 0;
1373 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1374 b = ((unsigned char)*s++ - ' ') & 077;
1375 else
1376 b = 0;
1377 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1378 c = ((unsigned char)*s++ - ' ') & 077;
1379 else
1380 c = 0;
1381 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1382 d = ((unsigned char)*s++ - ' ') & 077;
1383 else
1384 d = 0;
1385 hunk[0] = (char)(a << 2 | b >> 4);
1386 hunk[1] = (char)(b << 4 | c >> 2);
1387 hunk[2] = (char)(c << 6 | d);
1388 memcpy(ptr, hunk, mlen);
1389 ptr += mlen;
1390 len -= mlen;
1391 }
1392 if (s < send && (unsigned char)*s != '\r' && *s != '\n')
1393 s++; /* possible checksum byte */
1394 if (s < send && *s == '\r') s++;
1395 if (s < send && *s == '\n') s++;
1396 }
1397
1398 rb_str_set_len(buf, total);
1400 }
1401 break;
1402
1403 case 'm':
1404 {
1405 VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */
1406 char *ptr = RSTRING_PTR(buf);
1407 int a = -1,b = -1,c = 0,d = 0;
1408 static signed char b64_xtable[256];
1409
1410 if (b64_xtable['/'] <= 0) {
1411 int i;
1412
1413 for (i = 0; i < 256; i++) {
1414 b64_xtable[i] = -1;
1415 }
1416 for (i = 0; i < 64; i++) {
1417 b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1418 }
1419 }
1420 if (len == 0) {
1421 while (s < send) {
1422 a = b = c = d = -1;
1423 a = b64_xtable[(unsigned char)*s++];
1424 if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1425 b = b64_xtable[(unsigned char)*s++];
1426 if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1427 if (*s == '=') {
1428 if (s + 2 == send && *(s + 1) == '=') break;
1429 rb_raise(rb_eArgError, "invalid base64");
1430 }
1431 c = b64_xtable[(unsigned char)*s++];
1432 if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1433 if (s + 1 == send && *s == '=') break;
1434 d = b64_xtable[(unsigned char)*s++];
1435 if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1436 *ptr++ = castchar(a << 2 | b >> 4);
1437 *ptr++ = castchar(b << 4 | c >> 2);
1438 *ptr++ = castchar(c << 6 | d);
1439 }
1440 if (c == -1) {
1441 *ptr++ = castchar(a << 2 | b >> 4);
1442 if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1443 }
1444 else if (d == -1) {
1445 *ptr++ = castchar(a << 2 | b >> 4);
1446 *ptr++ = castchar(b << 4 | c >> 2);
1447 if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1448 }
1449 }
1450 else {
1451 while (s < send) {
1452 a = b = c = d = -1;
1453 while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1454 if (s >= send) break;
1455 s++;
1456 while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1457 if (s >= send) break;
1458 s++;
1459 while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1460 if (*s == '=' || s >= send) break;
1461 s++;
1462 while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1463 if (*s == '=' || s >= send) break;
1464 s++;
1465 *ptr++ = castchar(a << 2 | b >> 4);
1466 *ptr++ = castchar(b << 4 | c >> 2);
1467 *ptr++ = castchar(c << 6 | d);
1468 a = -1;
1469 }
1470 if (a != -1 && b != -1) {
1471 if (c == -1)
1472 *ptr++ = castchar(a << 2 | b >> 4);
1473 else {
1474 *ptr++ = castchar(a << 2 | b >> 4);
1475 *ptr++ = castchar(b << 4 | c >> 2);
1476 }
1477 }
1478 }
1481 }
1482 break;
1483
1484 case 'M':
1485 {
1486 VALUE buf = rb_str_new(0, send - s);
1487 char *ptr = RSTRING_PTR(buf), *ss = s;
1488 int csum = 0;
1489 int c1, c2;
1490
1491 while (s < send) {
1492 if (*s == '=') {
1493 if (++s == send) break;
1494 if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1495 s++;
1496 if (*s != '\n') {
1497 if ((c1 = hex2num(*s)) == -1) break;
1498 if (++s == send) break;
1499 if ((c2 = hex2num(*s)) == -1) break;
1500 csum |= *ptr++ = castchar(c1 << 4 | c2);
1501 }
1502 }
1503 else {
1504 csum |= *ptr++ = *s;
1505 }
1506 s++;
1507 ss = s;
1508 }
1510 rb_str_buf_cat(buf, ss, send-ss);
1514 }
1515 break;
1516
1517 case '@':
1518 if (len > RSTRING_LEN(str))
1519 rb_raise(rb_eArgError, "@ outside of string");
1520 s = RSTRING_PTR(str) + len;
1521 break;
1522
1523 case 'X':
1524 if (len > s - RSTRING_PTR(str))
1525 rb_raise(rb_eArgError, "X outside of string");
1526 s -= len;
1527 break;
1528
1529 case 'x':
1530 if (len > send - s)
1531 rb_raise(rb_eArgError, "x outside of string");
1532 s += len;
1533 break;
1534
1535 case 'P':
1536 if (sizeof(char *) <= (size_t)(send - s)) {
1537 VALUE tmp = Qnil;
1538 char *t;
1539
1540 memcpy(&t, s, sizeof(char *));
1541 s += sizeof(char *);
1542
1543 if (t) {
1544 VALUE a;
1545 const VALUE *p, *pend;
1546
1547 if (!(a = str_associated(str))) {
1548 rb_raise(rb_eArgError, "no associated pointer");
1549 }
1550 p = RARRAY_CONST_PTR(a);
1551 pend = p + RARRAY_LEN(a);
1552 while (p < pend) {
1553 if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
1554 if (len < RSTRING_LEN(*p)) {
1555 tmp = rb_str_new(t, len);
1556 str_associate(tmp, a);
1557 }
1558 else {
1559 tmp = *p;
1560 }
1561 break;
1562 }
1563 p++;
1564 }
1565 if (p == pend) {
1566 rb_raise(rb_eArgError, "non associated pointer");
1567 }
1568 }
1569 UNPACK_PUSH(tmp);
1570 }
1571 break;
1572
1573 case 'p':
1574 if (len > (long)((send - s) / sizeof(char *)))
1575 len = (send - s) / sizeof(char *);
1576 while (len-- > 0) {
1577 if ((size_t)(send - s) < sizeof(char *))
1578 break;
1579 else {
1580 VALUE tmp = Qnil;
1581 char *t;
1582
1583 memcpy(&t, s, sizeof(char *));
1584 s += sizeof(char *);
1585
1586 if (t) {
1587 VALUE a;
1588 const VALUE *p, *pend;
1589
1590 if (!(a = str_associated(str))) {
1591 rb_raise(rb_eArgError, "no associated pointer");
1592 }
1593 p = RARRAY_CONST_PTR(a);
1594 pend = p + RARRAY_LEN(a);
1595 while (p < pend) {
1596 if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
1597 tmp = *p;
1598 break;
1599 }
1600 p++;
1601 }
1602 if (p == pend) {
1603 rb_raise(rb_eArgError, "non associated pointer");
1604 }
1605 }
1606 UNPACK_PUSH(tmp);
1607 }
1608 }
1609 break;
1610
1611 case 'w':
1612 {
1613 char *s0 = s;
1614 while (len > 0 && s < send) {
1615 if (*s & 0x80) {
1616 s++;
1617 }
1618 else {
1619 s++;
1621 len--;
1622 s0 = s;
1623 }
1624 }
1625 }
1626 break;
1627
1628 default:
1629 unknown_directive("unpack", type, fmt);
1630 break;
1631 }
1632 }
1633
1634 return ary;
1635}
1636
1637static VALUE
1638pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt)
1639{
1641 return pack_unpack_internal(str, fmt, mode);
1642}
1643
1644static VALUE
1645pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt)
1646{
1647 return pack_unpack_internal(str, fmt, UNPACK_1);
1648}
1649
1650int
1651rb_uv_to_utf8(char buf[6], unsigned long uv)
1652{
1653 if (uv <= 0x7f) {
1654 buf[0] = (char)uv;
1655 return 1;
1656 }
1657 if (uv <= 0x7ff) {
1658 buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1659 buf[1] = castchar((uv&0x3f)|0x80);
1660 return 2;
1661 }
1662 if (uv <= 0xffff) {
1663 buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1664 buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1665 buf[2] = castchar((uv&0x3f)|0x80);
1666 return 3;
1667 }
1668 if (uv <= 0x1fffff) {
1669 buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1670 buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1671 buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1672 buf[3] = castchar((uv&0x3f)|0x80);
1673 return 4;
1674 }
1675 if (uv <= 0x3ffffff) {
1676 buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1677 buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1678 buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1679 buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1680 buf[4] = castchar((uv&0x3f)|0x80);
1681 return 5;
1682 }
1683 if (uv <= 0x7fffffff) {
1684 buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1685 buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1686 buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1687 buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1688 buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1689 buf[5] = castchar((uv&0x3f)|0x80);
1690 return 6;
1691 }
1692 rb_raise(rb_eRangeError, "pack(U): value out of range");
1693
1695}
1696
1697static const unsigned long utf8_limits[] = {
1698 0x0, /* 1 */
1699 0x80, /* 2 */
1700 0x800, /* 3 */
1701 0x10000, /* 4 */
1702 0x200000, /* 5 */
1703 0x4000000, /* 6 */
1704 0x80000000, /* 7 */
1705};
1706
1707static unsigned long
1708utf8_to_uv(const char *p, long *lenp)
1709{
1710 int c = *p++ & 0xff;
1711 unsigned long uv = c;
1712 long n;
1713
1714 if (!(uv & 0x80)) {
1715 *lenp = 1;
1716 return uv;
1717 }
1718 if (!(uv & 0x40)) {
1719 *lenp = 1;
1720 rb_raise(rb_eArgError, "malformed UTF-8 character");
1721 }
1722
1723 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1724 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1725 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1726 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1727 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1728 else {
1729 *lenp = 1;
1730 rb_raise(rb_eArgError, "malformed UTF-8 character");
1731 }
1732 if (n > *lenp) {
1733 rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1734 n, *lenp);
1735 }
1736 *lenp = n--;
1737 if (n != 0) {
1738 while (n--) {
1739 c = *p++ & 0xff;
1740 if ((c & 0xc0) != 0x80) {
1741 *lenp -= n + 1;
1742 rb_raise(rb_eArgError, "malformed UTF-8 character");
1743 }
1744 else {
1745 c &= 0x3f;
1746 uv = uv << 6 | c;
1747 }
1748 }
1749 }
1750 n = *lenp - 1;
1751 if (uv < utf8_limits[n]) {
1752 rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1753 }
1754 return uv;
1755}
1756
1757#include "pack.rbinc"
1758
1759void
1761{
1762 load_pack();
1763
1764 id_associated = rb_make_internal_id();
1765}
int errno
#define add(x, y)
Definition: date_strftime.c:23
struct RIMemo * ptr
Definition: debug.c:65
int rb_utf8_encindex(void)
Definition: encoding.c:1334
int rb_ascii8bit_encindex(void)
Definition: encoding.c:1322
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:830
int rb_usascii_encindex(void)
Definition: encoding.c:1346
#define ENC_CODERANGE_7BIT
Definition: encoding.h:104
#define ENC_CODERANGE_VALID
Definition: encoding.h:105
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Definition: encoding.h:113
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
int rb_block_given_p(void)
Determines if the current method is given a block.
Definition: eval.c:898
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:2671
void rb_bug(const char *fmt,...)
Definition: error.c:636
VALUE rb_eRangeError
Definition: error.c:928
VALUE rb_eTypeError
Definition: error.c:924
VALUE rb_eRuntimeError
Definition: error.c:922
VALUE rb_eArgError
Definition: error.c:925
VALUE rb_to_float(VALUE)
Converts a Numeric object into Float.
Definition: object.c:3542
VALUE rb_to_int(VALUE)
Converts val into Integer.
Definition: object.c:3021
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:39
#define FLOAT_CONVWITH(x)
Definition: pack.c:96
#define UNPACK_ARRAY
Definition: pack.c:921
#define hexdigits
#define HTOVF(x)
Definition: pack.c:98
#define VTOHD(x)
Definition: pack.c:106
#define AVOID_CC_BUG
Definition: pack.c:917
#define UNPACK_PUSH(item)
#define DOUBLE_CONVWITH(x)
Definition: pack.c:102
void Init_pack(void)
Definition: pack.c:1760
#define NTOHD(x)
Definition: pack.c:105
#define HTOVD(x)
Definition: pack.c:104
#define NATINT_LEN(type, len)
Definition: pack.c:69
#define HTONF(x)
Definition: pack.c:97
#define VTOHF(x)
Definition: pack.c:100
#define PACK_LENGTH_ADJUST_SIZE(sz)
Definition: pack.c:895
#define UNPACK_1
Definition: pack.c:923
#define BIGENDIAN_P()
Definition: pack.c:65
#define MAX_INTEGER_PACK_SIZE
Definition: pack.c:108
#define NATINT_LEN_Q
Definition: pack.c:39
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Definition: pack.c:1651
#define PACK_ITEM_ADJUST()
Definition: pack.c:905
#define castchar(from)
#define THISFROM
#define NEXTFROM
#define HTOND(x)
Definition: pack.c:103
#define NTOHF(x)
Definition: pack.c:99
#define UNPACK_BLOCK
Definition: pack.c:922
#define RARRAY_LEN(a)
int rb_integer_pack(VALUE val, void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
Definition: bignum.c:3547
__uint32_t uint32_t
#define NULL
use StringValue() instead")))
#define RSTRING_LEN(str)
#define STRTOUL(str, endptr, base)
#define T_STRING
VALUE rb_str_quote_unprintable(VALUE)
Definition: string.c:10714
#define StringValuePtr(v)
const VALUE VALUE obj
#define RSTRING_PTR(str)
int snprintf(char *__restrict__, size_t, const char *__restrict__,...) __attribute__((__format__(__printf__
#define rb_str_new(str, len)
#define NIL_P(v)
#define rb_str_buf_cat
__intptr_t intptr_t
#define DBL2NUM(dbl)
size_t rb_absint_numwords(VALUE val, size_t word_numbits, size_t *nlz_bits_ret)
Definition: bignum.c:3382
VALUE rb_integer_unpack(const void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
Definition: bignum.c:3633
const char size_t n
#define rb_usascii_str_new(str, len)
#define EOF
VALUE rb_ivar_lookup(VALUE obj, ID id, VALUE undef)
Definition: variable.c:1035
void rb_str_set_len(VALUE, long)
Definition: string.c:2692
unsigned long VALUE
VALUE rb_ary_push(VALUE, VALUE)
Definition: array.c:1195
VALUE rb_str_buf_new(long)
Definition: string.c:1315
const signed char ruby_digit36_to_number_table[]
Definition: escape.c:6
uint32_t i
#define char
#define INTEGER_PACK_LITTLE_ENDIAN
#define isnan(__x)
__inline__ const void *__restrict__ size_t len
const VALUE int int int int int int VALUE char * fmt
__uint64_t uint64_t
#define INTEGER_PACK_BIG_ENDIAN
const char * rb_obj_classname(VALUE)
Definition: variable.c:289
#define NAN
#define PRIsVALUE
int VALUE v
VALUE rb_ary_new(void)
Definition: array.c:723
#define INFINITY
#define UNREACHABLE_RETURN(val)
#define INTEGER_PACK_2COMP
char * strchr(const char *, int)
Definition: strchr.c:8
#define RFLOAT_VALUE(v)
VALUE rb_str_subseq(VALUE, long, long)
Definition: string.c:2474
ID rb_make_internal_id(void)
Definition: symbol.c:810
struct rb_call_cache buf
#define ISDIGIT(c)
__uintptr_t uintptr_t
#define Qnil
#define Qfalse
void * memcpy(void *__restrict__, const void *__restrict__, size_t)
#define SIGNED_VALUE
#define ULONG2NUM(x)
#define RB_TYPE_P(obj, type)
VALUE rb_obj_as_string(VALUE)
Definition: string.c:1440
__inline__ int
VALUE rb_ivar_set(VALUE, ID, VALUE)
Definition: variable.c:1300
unsigned long ID
const char *void rb_warning(const char *,...) __attribute__((format(printf
#define ISSPACE(c)
#define NUM2LONG(x)
#define RARRAY_CONST_PTR(a)
#define ISPRINT(c)
#define ISASCII(c)
#define ISALPHA(c)
#define f
#define le(x, y)
Definition: time.c:85
uint64_t u
Definition: pack.c:81
double d
Definition: pack.c:80
uint32_t u
Definition: pack.c:76
float f
Definition: pack.c:75