Class: ICU::CharsetDetector

Inherits:
Object
  • Object
show all
Defined in:
lib/icu/charset_detector.rb,
ext/icu/icu_charset_detector.c

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ Object



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'ext/icu/icu_charset_detector.c', line 66

VALUE detector_initialize(int argc, VALUE* argv, VALUE self)
{
    GET_DETECTOR(this);
    this->rb_instance = self;
    this->service = NULL;

    UErrorCode status = U_ZERO_ERROR;
    this->service = ucsdet_open(&status);
    if (U_FAILURE(status)) {
        icu_rb_raise_icu_error(status);
    }
    this->dummy_str = ALLOC_N(char, 1);
    this->dummy_str[0] = '\0';

    return self;
}

Instance Method Details

#detect(str) ⇒ Object

no input text has been provided



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'ext/icu/icu_charset_detector.c', line 105

VALUE detector_detect(VALUE self, VALUE str)
{
    StringValue(str);
    GET_DETECTOR(this);

    detector_set_text(this, str);
    UErrorCode status = U_ZERO_ERROR;
    const UCharsetMatch* match = ucsdet_detect(this->service, &status);
    if (U_FAILURE(status)) {
        icu_rb_raise_icu_error(status);
    }

    VALUE rb_match = detector_populate_match_struct(match);
    detector_reset_text(this);
    return rb_match;
}

#detect_all(str) ⇒ Object



122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# File 'ext/icu/icu_charset_detector.c', line 122

VALUE detector_detect_all(VALUE self, VALUE str)
{
    StringValue(str);
    GET_DETECTOR(this);

    detector_set_text(this, str);

    UErrorCode status = U_ZERO_ERROR;
    int32_t len_matches = 0;
    const UCharsetMatch** matches = ucsdet_detectAll(this->service, &len_matches, &status);
    if (U_FAILURE(status)) {
        icu_rb_raise_icu_error(status);
    }

    VALUE result = rb_ary_new2(3); // pre-allocate some slots
    for (int32_t i = 0; i < len_matches; ++i) {
        rb_ary_push(result, detector_populate_match_struct(matches[i]));
    }
    detector_reset_text(this);
    return result;
}

#detectable_charsetsObject



162
163
164
165
166
167
168
# File 'ext/icu/icu_charset_detector.c', line 162

VALUE detector_detectable_charsets(VALUE self)
{
    GET_DETECTOR(this);
    UErrorCode status = U_ZERO_ERROR;
    UEnumeration* charsets = ucsdet_getAllDetectableCharsets(this->service, &status);
    return icu_enum_to_rb_ary(charsets, status, 28);
}

#input_filterObject Also known as: input_filter_enabled?



149
150
151
152
153
# File 'ext/icu/icu_charset_detector.c', line 149

VALUE detector_get_input_filter(VALUE self)
{
    GET_DETECTOR(this);
    return detector_get_input_filter_internal(this);
}

#input_filter=(flag) ⇒ Object



155
156
157
158
159
160
# File 'ext/icu/icu_charset_detector.c', line 155

VALUE detector_set_input_filter(VALUE self, VALUE flag)
{
    GET_DETECTOR(this);
    ucsdet_enableInputFilter(this->service, flag == Qtrue ? TRUE : FALSE);
    return detector_get_input_filter_internal(this);
}