Module: Converter

Included in:
CastOff::Compiler::Translator
Defined in:
lib/cast_off/compile/namespace/namespace.rb

Defined Under Namespace

Classes: Quote

Instance Method Summary collapse

Instance Method Details

#gen_each_lenptr(var, str, strmax) ⇒ Object

Yet more long string than gen_lenptr



478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
# File 'lib/cast_off/compile/namespace/namespace.rb', line 478

def gen_each_lenptr var, str, strmax
  names = Array.new
  str.each_line.with_index do |i, j|
    a = rstring2cstr i, strmax
    case a.size when 1
      vnam  = sprintf '%s_%x', var, j
      gnam = gen_lenptr vnam, *a[0]
      names << gnam
    else
      a.each_with_index do |b, k|
        vnam = sprintf '%s_%x_%x', var, j, k
        gnam = gen_lenptr vnam, *b
        names << gnam
      end
    end
  end
  names.each_cons 2 do |x, y|
    y.depends x
  end
end

#gen_lenptr(var, ptr, len) ⇒ Object

Static allocation of a loooooong string



500
501
502
503
504
505
506
# File 'lib/cast_off/compile/namespace/namespace.rb', line 500

def gen_lenptr var, ptr, len
  name = $namespace.new var
  name.declaration = "static sourcecode_t #{name}"
  name.definition  = sprintf '%s = { %#05x, %s, };',
  name.declaration, len, ptr
  name
end

#robject2csource(obj, namespace, strmax, volatilep = false, name = nil, contentsp = false) ⇒ Object

Some kinds of literals are there:

  • Fixnums, as well as true, false, and nil: they are 100% statically computable while the compilation. No cache needed.

  • Bignums, Floats, Ranges and Symbols: they are almost static, except for the first time. Suited for caching.

  • Classes: not computable by the compiler, but once a ruby process boots up, they already are.

  • Strings: every time a literal is evaluated, a new string object is created. So a cache won’t work.

  • Regexps: almost the same as Strings, except for /…/o, which can be cached.

  • Arrays and Hashes: they also generate new objects every time, but their contents can happen to be cached.

Cached objects can be “shared” – for instance multiple occasions of an identical bignum can and should point to a single address of memory.



281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
# File 'lib/cast_off/compile/namespace/namespace.rb', line 281

def robject2csource obj, namespace, strmax, volatilep = false, name = nil, contentsp = false
  decl = 'VALUE'
  vdef = 'Qundef'
  init = nil
  deps = Array.new
  expr = nil
  case obj
  when Quote # hack
    name ||= obj.unquote.to_s
  when Fixnum
    name ||= 'LONG2FIX(%d)' % obj
  when TrueClass, FalseClass, NilClass
    name ||= 'Q%p' % obj
  when Bignum
    # Bignums can  be large  enough to exceed  C's string max.   From this
    # method's usage a  bignum reaching this stage is  sourced from a Ruby
    # source  code's bignum  literals, so  they might  not be  much larger
    # though.
    name ||= namespace.new 'num_' + obj.to_s
    rstr = robject2csource obj.to_s, namespace, strmax, :volatile
    init = sprintf "rb_str2inum(%s, 10)", rstr
    deps << rstr
  when Float
    name ||= namespace.new 'float_' + obj.to_s
    init = sprintf 'rb_float_new(%s)', obj
  when Range
    from = robject2csource obj.begin, namespace, strmax, :volatile
    to   = robject2csource obj.end, namespace, strmax, :volatile
    xclp = obj.exclude_end? ? 1 : 0
    init = sprintf "rb_range_new(%s, %s, %d)", from, to, xclp
    name ||= namespace.new
    deps << from << to
  when Class
    # From  my  investigation over  the  MRI  implementation, those  three
    # classes  are the  only classes  that  can appear  in an  instruction
    # sequence.  Don't know why though.
    init = if obj == Object           then 'rb_cObject'
           elsif obj == Array         then 'rb_cArray'
           elsif obj == StandardError then 'rb_eStandardError'
           else
             raise TypeError, "unknown literal object #{obj}"
           end
  when String
    #if obj.empty?
    ## Empty strings are lightweight enough, do not need encodings.
    #name ||= 'rb_str_new(0, 0)'
    #else
    # Like I write here and there  Ruby strings can be much longer than
    # C strings can be.  Plus a  Ruby string has its encoding.  So when
    # we reconstruct a Ruby string, we  need a set of C strings plus an
    # encoding object.
    #if obj.ascii_only?
    #name ||= $namespace.new 'str_' + obj
    #aenc = Encoding.find 'US-ASCII'
    #encn = robject2csource aenc, namespace, strmax, :volatile
    #else
    name ||= namespace.new 'str_' + obj.encoding.name + '_' + obj
    encn = robject2csource obj.encoding, namespace, strmax, :volatile, nil, true
    #end
    deps << encn
    argv = rstring2cstr obj, strmax
    argv.each do |i|
      if init
        x = sprintf ";\nrb_enc_str_buf_cat(%s, %s, %d, %s)",
          name, *i, encn
        init << x
      else
        init = sprintf "rb_enc_str_new(%s, %d, %s)", *i, encn
      end
    end
    if $YARVAOT_DEBUG
      #init << ";\n    /* #{obj} */"
    end
    #end
  when Encoding
    # Thank  goodness, encoding  names are  short and  will  never contain
    # multilingual chars.
    rstr = obj.name
    if contentsp
      decl = 'rb_encoding*'
      vdef = '0'
      init = 'rb_enc_find("%s")' % rstr
      name ||= namespace.new 'enc_' + rstr
    else
      encn = robject2csource obj, namespace, strmax, :volatile, nil, true
      deps << encn
      init = 'rb_enc_from_encoding(%s)' % encn
      name ||= namespace.new 'encval_' + rstr
    end
  when Symbol
    str = obj.id2name
    if str.bytesize <= strmax
      # Why a symbol is not cached as  a VALUE?  Well a VALUE in C static
      # variable needs  to be scanned  during GC because VALUEs  can have
      # links against some other objects  in general.  But that's not the
      # case for  Symbols --  they do not  have links internally.   An ID
      # variable needs no  GC because it's clear they  are not related to
      # GC at all.   So a Symbol is more efficient when  stored as an ID,
      # rather than a VALUE.
      a = rstring2cstr str, strmax
      e = robject2csource str.encoding, namespace, strmax, :volatile, nil, true
      name = namespace.new 'sym_' + obj.to_s
      decl = 'ID'
      vdef = '0'
      init = sprintf 'rb_intern3(%s, %d, %s);', *a[0], e
      expr = 'ID2SYM(%s)' % name.name
      deps << e
    else
      # Longer symbols are much like regexps
      name ||= namespace.new 'sym_' + str
      rstr = robject2csource str, namespace, strmax, :volatile
      init = 'rb_str_intern(%s)' % rstr
      deps << rstr
    end
  when Regexp
    opts = obj.options
    srcs = robject2csource obj.source, namespace, strmax, :volatile
    name ||= namespace.new "reg#{opts}_" + srcs.to_s
    init = sprintf 'rb_reg_new_str(%s, %d)', srcs, opts
    deps << srcs
  when Array
    n = obj.length
    if n == 0
      # zero-length  arrays need  no cache,  because a  creation  of such
      # object is fast enough.
      name ||= 'rb_ary_new2(0)'
      #elsif n == 1
      ## no speedup, but a bit readable output
      #i    = obj.first
      #e    = robject2csource i, namespace, strmax, :volatile
      #j    = as_tr_cpp e.to_s
      #s    = 'a' + j
      #name ||= $namespace.new s
      #init = 'rb_ary_new3(1, %s)' % e
      #deps << e
    elsif n <= 30
      # STDC's max  # of function arguments  are 31, so at  most 30 elems
      # are made at once.
      init = 'rb_ary_new3(%d' % obj.length
      obj.each do |x|
        y = robject2csource x, namespace, strmax, :volatile
        init << ",\n        " << y.to_s
        deps << y
      end
      init << ')'
      s = init.sub %r/\Arb_ary_new3\(\d+,\s+/, 'a'
                                     name ||= namespace.new 'ary_' + s
    else
      # Too large to create at once.  Feed litte by litte.
      name ||= namespace.new
      init = 'rb_ary_new()'
      obj.each do |i|
        j = robject2csource i, namespace, strmax, :volatile
        k = sprintf 'rb_ary_push(%s, %s)', name, j
        init << ";\n    " << k
        deps << j
      end
    end
  when Hash
    # Hashes are not computable in a single expression...
    name ||= namespace.new
    init = "rb_hash_new()"
    obj.each_pair do |k, v|
      knam = robject2csource k, namespace, strmax, :volatile
      vnam = robject2csource v, namespace, strmax, :volatile
      aset = sprintf 'rb_hash_aset(%s, %s, %s)', name, knam, vnam
      init << ";\n    " << aset
      deps << knam << vnam
    end
  else
    raise TypeError, "unknown literal object #{obj.inspect}"
  end

  name ||= namespace.new init
  case name when namespace
    static_decl = "static #{decl}"
    if volatilep and name.declaration == static_decl
      # OK? same object, different visibility
    elsif not volatilep and name.declaration == decl
      # OK? same object, different visibility
      name.force_set_decl! static_decl
    else
      name.declaration = volatilep ? decl : static_decl
    end
    name.definition     = "#{name.declaration} #{name.name} = #{vdef};"
    name.initialization = "#{name.name} = #{init};" if init
    name.expression     = expr
    deps.each do |i|
      case i when namespace
        name.dependencies.push i
      end
    end
  end
  return name
end

#rstring2cstr(str, strmax, rs = nil) ⇒ Object

Returns a 2-dimensional array [[str, len], [str, len], … ]

This is needed because Ruby’s String#dump is different from C’s.



511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
# File 'lib/cast_off/compile/namespace/namespace.rb', line 511

def rstring2cstr str, strmax, rs = nil
  return [["".inspect, 0]] if str.empty?
  a = str.each_line rs
  a = a.to_a
  a.map! do |b|
    c = b.each_byte.each_slice strmax
    c.to_a
  end
  a.flatten! 1
  a.map! do |bytes|
    b = bytes.each_slice 80
    c = b.map do |d|
      d.map do |e|
        '\\x%x' % e
        #case e # this case statement is optimized
        #when 0x00 then '\\0'
        #when 0x07 then '\\a'
        #when 0x08 then '\\b'
        #when 0x09 then '\\t'
        #when 0x0A then '\\n'
        #when 0x0B then '\\v'
        #when 0x0C then '\\f'
        #when 0x0D then '\\r'
        #when 0x22 then '\\"'
        #when 0x27 then '\\\''
        #when 0x5C then '\\\\' # not \\
        #else
        #case e
        #when 0x20 ... 0x7F then '%c' % e
        #else '\\x%x' % e
        #end
        #end
      end
    end
    c.map! do |d|
      "\n        " '"' + d.join + '"'
    end
    if c.size == 1
      c.first.strip!
    end
    [ c.join, bytes.size, ]
  end
  a
end