Class: Ontology

Inherits:
Object
  • Object
show all
Defined in:
lib/semtools/ontology.rb

Constant Summary collapse

@@basic_tags =

FIELDS

Handled class variables

> @@basic_tags :: hash with main OBO structure tags

> @@allowed_calcs :: hash with allowed ICs and similaritites calcs

> @@symbolizable_ids :: tags which can be symbolized

> @@tags_with_trailing_modifiers :: tags which can include extra info after specific text modifiers

Handled object variables

> @header :: file header (if is available)

> @stanzas :: OBO stanzas :terms,:typedefs,:instances

> @ancestors_index :: hash of ancestors per each term handled with any structure relationships

> @descendants_index :: hash of descendants per each term handled with any structure relationships

> @alternatives_index :: has of alternative IDs (include alt_id and obsoletes)

> @obsoletes_index :: hash of obsoletes and it’s new ids

> @special_tags :: set of special tags to be expanded (:is_a, :obsolete, :alt_id)

> @structureType :: type of ontology structure depending on ancestors relationship. Allowed: sparse, circular, hierarchical

> @ics :: already calculated ICs for handled terms and IC types

> @meta :: meta_information about handled terms like [ancestors, descendants, struct_freq, observed_freq]

> @max_freqs :: maximum freqs found for structural and observed freqs

> @dicts :: bidirectional dictionaries with three levels <key|value>: 1º) <tag|hash2>; 2º) <(:byTerm/:byValue)|hash3>; 3º) dictionary <k|v>

> @profiles :: set of terms assigned to an ID

> @profilesDict :: set of profile IDs assigned to a term

> @items :: hash with items relations to terms

> @removable_terms :: array of terms to not be considered

> @term_paths :: metainfo about parental paths of each term

{ancestors: [:is_a], obsolete: :is_obsolete, alternative: [:alt_id,:replaced_by,:consider]}
@@allowed_calcs =
{ics: [:resnik, :resnik_observed, :seco, :zhou, :sanchez], sims: [:resnik, :lin, :jiang_conrath]}
@@symbolizable_ids =
[:id, :alt_id, :replaced_by, :consider]
@@tags_with_trailing_modifiers =
[:is_a, :union_of, :disjoint_from, :relationship, :subsetdef, :synonymtypedef, :property_value]
@@multivalue_tags =
[:alt_id, :is_a, :subset, :synonym, :xref, :intersection_of, :union_of, :disjoint_from, :relationship, :replaced_by, :consider, :subsetdef, :synonymtypedef, :property_value, :remark]

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file: nil, load_file: false, removable_terms: []) ⇒ Ontology

Instantiate a OBO_Handler object

Parameters
file

with info to be loaded (.obo ; .json)

load_file

activate load process automatically (only for .obo)

removable_terms: term to be removed from calcs



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/semtools/ontology.rb', line 57

def initialize(file: nil, load_file: false, removable_terms: [])
  # Initialize object variables
  @header = nil
  @stanzas = {terms: {}, typedefs: {}, instances: {}}
  @ancestors_index = {}
  @descendants_index = {}
  @alternatives_index = {}
  @obsoletes_index = {}
  @structureType = nil
  @ics = Hash[@@allowed_calcs[:ics].map{|ictype| [ictype, {}]}]
  @meta = {}
  @special_tags = @@basic_tags.clone
  @max_freqs = {:struct_freq => -1.0, :observed_freq => -1.0, :max_depth => -1.0}
  @dicts = {}
  @profiles = {}
  @profilesDict = {}
  @items = {}
  @removable_terms = []
  @term_paths = {}
  # Load if proceeds
  add_removable_terms(removable_terms) if !removable_terms.empty?
  load(file) if load_file
end

Instance Attribute Details

#alternatives_indexObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def alternatives_index
  @alternatives_index
end

#ancestors_indexObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def ancestors_index
  @ancestors_index
end

#descendants_indexObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def descendants_index
  @descendants_index
end

#dictsObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def dicts
  @dicts
end

#fileObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def file
  @file
end

#headerObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def header
  @header
end

#icsObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def ics
  @ics
end

#itemsObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def items
  @items
end

#max_freqsObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def max_freqs
  @max_freqs
end

#metaObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def meta
  @meta
end

#obsoletes_indexObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def obsoletes_index
  @obsoletes_index
end

#profilesObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def profiles
  @profiles
end

#profilesDictObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def profilesDict
  @profilesDict
end

#removable_termsObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def removable_terms
  @removable_terms
end

#special_tagsObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def special_tags
  @special_tags
end

#stanzasObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def stanzas
  @stanzas
end

#structureTypeObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def structureType
  @structureType
end

#term_pathsObject

ACCESS CONTROL



2039
2040
2041
# File 'lib/semtools/ontology.rb', line 2039

def term_paths
  @term_paths
end

Class Method Details

Expand a (starting) term using a specific tag and return all extended terms into an array and the relationship structuture observed (hierarchical or circular). If circular structure is foumd, extended array will be an unique vector without starting term (no loops). Note: we extremly recomend use get_related_ids_by_tag function instead of it (directly)

Parameters
start

term where start to expand

terms

set to be used to expand

target_tag

tag used to expand

eexpansion

already expanded info

split_info_char

special regex used to split info (if it is necessary)

split_info_indx

special index to take splitted info (if it is necessary)

alt_ids

set of alternative IDs

Returns

A vector with the observed structure (string) and the array with extended terms.



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/semtools/ontology.rb', line 100

def self.get_related_ids(start_id, terms, target_tag, related_ids = {}, alt_ids = {})
  # Take start_id term available info and already accumulated info
  current_associations = related_ids[start_id]
  current_associations = [] if current_associations.nil? 
  return [:no_term,[]] if terms[start_id].nil?
  id_relations = terms[start_id][target_tag]
  return [:source,[]] if id_relations.nil?

  # Prepare auxiliar variables
  struct = :hierarchical

  # Study direct extensions
  id_relations = id_relations.clone
  while id_relations.length > 0
    id = id_relations.shift
    id = alt_ids[id].first if alt_ids.include?(id) # NOTE: if you want to persist current ID instead source ID, re-implement this
    
    # Handle
    if current_associations.include?(id) # Check if already have been included into this expansion
      struct = :circular 
    else
      current_associations << id 
      if related_ids.include?(id) # Check if current already has been expanded
        current_associations = current_associations | related_ids[id]
        if current_associations.include?(start_id) # Check circular case
          struct = :circular
          [id, start_id].each{|repeated| current_associations.delete(repeated)}
        end 
      else # Expand
        related_ids[start_id] = current_associations
        structExp, current_related_ids = self.get_related_ids(id, terms, target_tag, related_ids, alt_ids) # Expand current
        current_associations = current_associations | current_related_ids
        struct = :circular if structExp == :circular # Check struct       
        if current_associations.include?(start_id) # Check circular case
          struct = :circular
          current_associations.delete(start_id)
        end
      end
    end
  end
  related_ids[start_id] = current_associations

  return struct, current_associations
end

Expand terms using a specific tag and return all extended terms into an array and the relationship structuture observed (hierarchical or circular). If circular structure is foumd, extended array will be an unique vector without starting term (no loops)

Parameters
terms

set to be used to expand

target_tag

tag used to expand

split_info_char

special regex used to split info (if it is necessary)

split_info_indx

special index to take splitted info (if it is necessary)

alt_ids

set of alternative IDs

obsoletes

integer with the number of obsolete IDs. used to calculate structure type.

Returns

A vector with the observed structure (string) and the hash with extended terms



158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/semtools/ontology.rb', line 158

def self.get_related_ids_by_tag(terms:,target_tag:, alt_ids: {}, obsoletes: 0)
  # Define structure type
  structType = :hierarchical
  related_ids = {}
  terms.each do |id, tags|
    # Check if target tag is defined
    if !tags[target_tag].nil?
      # Obtain related terms
      set_structure, _ = self.get_related_ids(id, terms, target_tag, related_ids, alt_ids)
      # Check structure      
      structType = :circular if set_structure == :circular
    end
  end

  # Check special case
  structType = :atomic if related_ids.length <= 0
  structType = :sparse if related_ids.length > 0 && ((terms.length - related_ids.length - obsoletes) >= 2)
  # Return type and hash with related_ids
  return structType, related_ids
end

.info2hash(attributes, split_char = " ! ", selected_field = 0) ⇒ Object

Class method to transform string with <tag : info> into hash structure

Parameters
attributes

array tuples with info to be transformed into hash format

Returns

Attributes stored into hash structure



185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# File 'lib/semtools/ontology.rb', line 185

def self.info2hash(attributes, split_char = " ! ", selected_field = 0)
  # Load info
  info_hash = {}
  # Only TERMS multivalue tags (future add Typedefs and Instance)
  # multivalue_tags = [:alt_id, :is_a, :subset, :synonym, :xref, :intersection_of, :union_of, :disjoint_from, :relationship, :replaced_by, :consider]
  attributes.each do |tag, value|
    # Check
    raise EncodingError, 'Info element incorrect format' if (tag.nil?) || (value.nil?)
    # Prepare
    tag = tag.lstrip.to_sym
    value.lstrip!
    value = value.split(split_char)[selected_field].to_sym if @@tags_with_trailing_modifiers.include?(tag)
    
    # Store
    query = info_hash[tag]
    if !query.nil? # Tag already exists
      if !query.kind_of?(Array) # Check that tag is multivalue
        raise('Attempt to concatenate plain text with another. The tag is not declared as multivalue. [' + tag.to_s + '](' + query + ')')
      else
        query << value  # Add new value to tag
      end
    else # New entry
      if @@multivalue_tags.include?(tag)
        info_hash[tag] = [value]
      else
        info_hash[tag] = value
      end
    end
  end
  self.symbolize_ids(info_hash)
  return info_hash
end

.load_obo(file) ⇒ Object

Class method to load an OBO format file (based on OBO 1.4 format). Specially focused on load the Header, the Terms, the Typedefs and the Instances.

Parameters
file

OBO file to be loaded

Returns

Hash with FILE, HEADER and STANZAS info



225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
# File 'lib/semtools/ontology.rb', line 225

def self.load_obo(file) #TODO: Send to obo_parser class
  raise("File is not defined") if file.nil?
  # Data variables
  header = ''
  stanzas = {terms: {}, typedefs: {}, instances: {}}
  # Auxiliar variables
  infoType = 'Header'
  currInfo = []
  stanzas_flags = %w[[Term] [Typedef] [Instance]]
  # Read file
  File.open(file).each do |line|
    line.chomp!
    next if line.empty?
    fields = line.split(':', 2)
    # Check if new instance is found
    if stanzas_flags.include?(line)
      header = self.process_entity(header, infoType, stanzas, currInfo)
      # Update info variables
      currInfo = []
      infoType = line.gsub!(/[\[\]]/, '')
      next
    end
    # Concat info
    currInfo << fields  
  end
  # Store last loaded info
  header = self.process_entity(header, infoType, stanzas, currInfo) if !currInfo.empty?

  # Prepare to return
  finfo = {:file => file, :name => File.basename(file, File.extname(file))}
  return finfo, header, stanzas
end

.mutate(root, ontology, clone: true, remove_up: true) ⇒ Object

Parameters
root

main term to expand

ontology

to be cutted

clone

if true, given ontology object will not be mutated

remove_up

if true, stores only the root term given an it descendants. If false, only root ancestors will be stored

Returns

An Ontology object with terms after cut the ontology.



312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
# File 'lib/semtools/ontology.rb', line 312

def self.mutate(root, ontology, clone: true, remove_up: true)
  ontology = ontology.clone if clone
  # Obtain affected IDs
  descendants = ontology.descendants_index[root]
  descendants << root # Store itself to do not remove it
  # Remove unnecesary terms
  ontology.stanzas[:terms] = ontology.stanzas[:terms].select{|id,v| remove_up ? descendants.include?(id) : !descendants.include?(id)}
  ontology.ics = Hash[@@allowed_calcs[:ics].map{|ictype| [ictype, {}]}]
  ontology.max_freqs = {:struct_freq => -1.0, :observed_freq => -1.0, :max_depth => -1.0}
  ontology.dicts = {}
  ontology.removable_terms = []
  ontology.term_paths = {}
  # Recalculate metadata
  ontology.build_index
  ontology.add_observed_terms_from_profiles
  # Finish
  return ontology
end

.process_entity(header, infoType, stanzas, currInfo) ⇒ Object

Handle OBO loaded info and stores it into correct container and format

Parameters
header

container

infoType

current ontology item type detected

stanzas

container

currInfo

info to be stored

Returns

header newly/already stored



267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
# File 'lib/semtools/ontology.rb', line 267

def self.process_entity(header, infoType, stanzas, currInfo)
  info = self.info2hash(currInfo)
  # Store current info
  if infoType.eql?('Header')
    header = info
  else
    id = info[:id]
    case infoType
      when 'Term'
        stanzas[:terms][id] = info
      when 'Typedef'
        stanzas[:typedefs][id] = info
      when 'Instance'
        stanzas[:instances][id] = info
    end
  end
  return header
end

.symbolize_ids(item_hash) ⇒ Object

Symboliza all values into hashs using symbolizable tags as keys

Parameters
item_hash

hash to be checked



290
291
292
293
294
295
296
297
298
299
300
301
# File 'lib/semtools/ontology.rb', line 290

def self.symbolize_ids(item_hash)
  @@symbolizable_ids.each do |tag|
    query = item_hash[tag] 
    if !query.nil?
      if query.kind_of?(Array)
        query.map!{|item| item.to_sym}
      else
        item_hash[tag] = query.to_sym if !query.nil?
      end
    end
  end
end

Instance Method Details

#==(other) ⇒ Object

SPECIAL METHODS



1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
# File 'lib/semtools/ontology.rb', line 1990

def ==(other)
  self.header == other.header &&
    self.stanzas == other.stanzas &&
    self.ancestors_index == other.ancestors_index &&
    self.alternatives_index == other.alternatives_index &&
    self.obsoletes_index == other.obsoletes_index &&
    self.structureType == other.structureType &&
    self.ics == other.ics &&
    self.meta == other.meta &&
    self.dicts == other.dicts &&
    self.profiles == other.profiles &&
    self.profilesDict == other.profilesDict &&
    (self.items.keys - other.items.keys).empty? &&
    self.removable_terms == other.removable_terms &&
    self.special_tags == other.special_tags &&
    self.items == other.items &&
    self.term_paths == other.term_paths &&
    self.max_freqs == other.max_freqs
end

#add_observed_term(term:, increase: 1.0) ⇒ Object

Increase observed frequency for a specific term

Parameters
term

term which frequency is going to be increased

increas

frequency rate to be increased. Default = 1

Return

true if process ends without errors, false in other cases

Raises:

  • (ArgumentError)


364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
# File 'lib/semtools/ontology.rb', line 364

def add_observed_term(term:,increase: 1.0)
  # Check
  raise ArgumentError, "Term given is NIL" if term.nil?
  return false unless @stanzas[:terms].include?(term)
  return false if @removable_terms.include?(term)
  if @alternatives_index.include?(term)
    alt_id = @alternatives_index[term]
    @meta[alt_id] = {:ancestors => -1.0,:descendants => -1.0,:struct_freq => 0.0,:observed_freq => 0.0} if @meta[alt_id].nil?
    @meta[term] = @meta[alt_id]       
  end
  # Check if exists
  @meta[term] = {:ancestors => -1.0,:descendants => -1.0,:struct_freq => 0.0,:observed_freq => 0.0} if @meta[term].nil?
  # Add frequency
  @meta[term][:observed_freq] = 0 if @meta[term][:observed_freq] == -1
  @meta[term][:observed_freq] += increase
  # Check maximum frequency
  @max_freqs[:observed_freq] = @meta[term][:observed_freq] if @max_freqs[:observed_freq] < @meta[term][:observed_freq]  
  return true
end

#add_observed_terms(terms:, increase: 1.0, transform_to_sym: false) ⇒ Object

Increase the arbitrary frequency of a given term set

Parameters
terms

set of terms to be updated

increase

amount to be increased

transform_to_sym

if true, transform observed terms to symbols. Default: false

Return

true if process ends without errors and false in other cases

Raises:

  • (ArgumentError)


392
393
394
395
396
397
398
399
400
401
402
403
# File 'lib/semtools/ontology.rb', line 392

def add_observed_terms(terms:, increase: 1.0, transform_to_sym: false)
  # Check
  raise ArgumentError, 'Terms array given is NIL' if terms.nil?
  raise ArgumentError, 'Terms given is not an array' if !terms.is_a? Array
  # Add observations
  if transform_to_sym
    checks = terms.map{|id| self.add_observed_term(term: id.to_sym,increase: increase)}
  else
    checks = terms.map{|id| self.add_observed_term(term: id,increase: increase)}
  end
  return checks
end

#add_observed_terms_from_profiles(reset: false) ⇒ Object

Includes as “observed_terms” all terms included into stored profiles

Parameters
reset

if true, reset observed freqs alreeady stored befor re-calculate



1393
1394
1395
1396
# File 'lib/semtools/ontology.rb', line 1393

def add_observed_terms_from_profiles(reset: false)
  @meta.each{|term, freqs| freqs[:observed_freq] = -1} if reset
  @profiles.each{|id, terms| self.add_observed_terms(terms: terms)}
end

#add_profile(id, terms, substitute: true) ⇒ Object

Stores a given profile with an specific ID. If ID is already assigend to a profile, it will be replaced

Parameters
id

assigned to profile

terms

array of terms

substitute

subsstitute flag from check_ids



1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
# File 'lib/semtools/ontology.rb', line 1266

def add_profile(id, terms, substitute: true)
  warn("Profile assigned to ID (#{id}) is going to be replaced") if @profiles.include? id
  correct_terms, rejected_terms = self.check_ids(terms, substitute: substitute)
  if !rejected_terms.empty?
    warn('Given terms contains erroneus IDs. These IDs will be removed')
  end
  if id.is_a? Numeric
    @profiles[id] = correct_terms       
  else
    @profiles[id.to_sym] = correct_terms  
  end
end

#add_removable_terms(terms) ⇒ Object

Include removable terms to current removable terms list

Parameters
terms

terms array to be concatenated



340
341
342
343
# File 'lib/semtools/ontology.rb', line 340

def add_removable_terms(terms)
  terms = terms.map{|term| term.to_sym}
  @removable_terms.concat(terms)
end

#add_removable_terms_from_file(file) ⇒ Object

Include removable terms to current removable terms list loading new terms from a one column plain text file

Parameters
file

to be loaded



350
351
352
353
354
355
# File 'lib/semtools/ontology.rb', line 350

def add_removable_terms_from_file(file)
  File.open(excluded_codes_file).each do |line|
    line.chomp!
    @removable_terms << line.to_sym
  end
end

#build_indexObject

Executes basic expansions of tags (alternatives, obsoletes and parentals) with default values

Returns

true if eprocess ends without errors and false in other cases



511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
# File 'lib/semtools/ontology.rb', line 511

def build_index()
  self.get_index_alternatives
  self.get_index_obsoletes
  self.get_index_child_parent_relations
    @alternatives_index.map{|k,v| @alternatives_index[k] = self.extract_id(v)}
    @alternatives_index.compact!
    @obsoletes_index.map{|k,v| @obsoletes_index[k] = self.extract_id(v)}
    @obsoletes_index.compact!
    @ancestors_index.map{|k,v| @ancestors_index[k] = v.map{|t| self.extract_id(t)}.compact}
    @ancestors_index.compact!
    @descendants_index.map{|k,v| @descendants_index[k] = v.map{|t| self.extract_id(t)}.compact}
    @descendants_index.compact!
  self.get_index_frequencies
  self.calc_dictionary(:name)
  self.calc_dictionary(:synonym, select_regex: /\"(.*)\"/)
  self.calc_term_levels(calc_paths: true)
end

#calc_ancestors_dictionaryObject

Calculates :is_a dictionary without alternatives substitution



1141
1142
1143
# File 'lib/semtools/ontology.rb', line 1141

def calc_ancestors_dictionary
  self.calc_dictionary(:is_a, substitute_alternatives: false, self_type_references: true, multiterm: true)
end

#calc_dictionary(tag, select_regex: nil, substitute_alternatives: true, store_tag: nil, multiterm: false, self_type_references: false) ⇒ Object

Generate a bidirectinal dictionary set using a specific tag and terms stanzas set This functions stores calculated dictionary into @dicts field. This functions stores first value for multivalue tags This function does not handle synonyms for byValue dictionaries

Parameters
tag

to be used to calculate dictionary

select_regex

gives a regfex that can be used to modify value to be stored

substitute_alternatives

flag used to indicate if alternatives must, or not, be replaced by it official ID

store_tag

flag used to store dictionary. If nil, mandatory tag given will be used

multiterm

if true, byValue will allows multi-term linkage (array)

self_type_references

if true, program assumes that refrences will be between Ontology terms, and it term IDs will be checked

Return

void. And stores calcualted bidirectional dictonary into dictionaries main container



1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
# File 'lib/semtools/ontology.rb', line 1032

def calc_dictionary(tag, select_regex: nil, substitute_alternatives: true, store_tag: nil, multiterm: false, self_type_references: false)
  tag = tag.to_sym
  store_tag = tag if store_tag.nil?
  if @stanzas[:terms].empty?
    warn('Terms are not already loaded. Aborting dictionary calc') 
  else
    byTerm = {}
    byValue = {}
    # Calc per term
    @stanzas[:terms].each do |term, tags|
      referenceTerm = term
      if @alternatives_index.include?(term) && substitute_alternatives # Special case
        referenceTerm = @alternatives_index[term] if !@obsoletes_index.include?(@alternatives_index[term])
      end
      queryTag = tags[tag]
      if !queryTag.nil?
        # Pre-process
        if !select_regex.nil?
          if queryTag.kind_of?(Array)
            queryTag = queryTag.map{|value| value.scan(select_regex).first}
            queryTag.flatten!
          else
            queryTag = queryTag.scan(select_regex).first
          end
          queryTag.compact!
        end
        if queryTag.kind_of?(Array) # Store
          if !queryTag.empty?
            if byTerm.include?(referenceTerm)
              byTerm[referenceTerm] = (byTerm[referenceTerm] + queryTag).uniq
            else
              byTerm[referenceTerm] = queryTag
            end
            if multiterm
              queryTag.each do |value|
                byValue[value] = [] if byValue[value].nil? 
                byValue[value] << referenceTerm
              end                
            else
              queryTag.each{|value| byValue[value] = referenceTerm}
            end
          end
        else
          if byTerm.include?(referenceTerm)
            byTerm[referenceTerm] = (byTerm[referenceTerm] + [queryTag]).uniq
          else
            byTerm[referenceTerm] = [queryTag]
          end
          if multiterm
            byValue[queryTag] = [] if byValue[queryTag].nil?
            byValue[queryTag] << referenceTerm
          else
            byValue[queryTag] = referenceTerm
          end
        end
      end
    end
    
    # Check self-references
    if self_type_references
      byTerm.map do |term, references|
        corrected_references = references.map do |t|
          checked = self.extract_id(t)
          if checked.nil?
            t
          else
            byValue[checked] = byValue.delete(t) if checked != t && !byValue.keys.include?(checked) # Update in byValue
            checked
          end
        end
        byTerm[term] = corrected_references.uniq
      end
    end

    # Check order
    byTerm.map do |term,values|
      if self.exists?(term)
        referenceValue = @stanzas[:terms][term][tag]
        if !referenceValue.nil?
          if !select_regex.nil?
            if referenceValue.kind_of?(Array)
              referenceValue = referenceValue.map{|value| value.scan(select_regex).first}
              referenceValue.flatten!
            else
              referenceValue = referenceValue.scan(select_regex).first
            end
            referenceValue.compact!
          end
          if self_type_references
            if referenceValue.kind_of?(Array)
              aux = referenceValue.map{|t| self.extract_id(t)}
            else
              aux = self.extract_id(referenceValue)
            end
            referenceValue = aux if !aux.nil?
          end
          referenceValue = [referenceValue] if !referenceValue.kind_of?(Array)
          byTerm[term] = referenceValue + (values - referenceValue)
        end
      end
    end

    # Store
    @dicts[store_tag] = {byTerm: byTerm, byValue: byValue}
  end
end

#calc_profiles_dictionaryObject

Calculate profiles dictionary with Key= Term; Value = Profiles



1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
# File 'lib/semtools/ontology.rb', line 1731

def calc_profiles_dictionary
  if @profiles.empty?
    warn('Profiles are not already loaded. Aborting dictionary calc')
  else
    byTerm = {} # Key: Terms
    # byValue -- Key: Profile == @profiles
    @profiles.each do |id, terms|
      terms.each do |term|
        if byTerm.include?(term)
          byTerm[term] << id
        else
          byTerm[term] = [id]
        end
      end
    end
    @profilesDict = byTerm
  end
end

#calc_term_levels(calc_paths: false, shortest_path: true) ⇒ Object

Calculates ontology structural levels for all ontology terms

Parameters
calc_paths

calculates term paths if it’s not already calculated

shortest_path

if true, level is calculated with shortest path, largest path will be used in other cases



1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
# File 'lib/semtools/ontology.rb', line 1581

def calc_term_levels(calc_paths: false, shortest_path: true)
  if @term_paths.empty?
    if calc_paths
      self.calc_term_paths
    else
      warn('Term paths are not already loaded. Aborting dictionary calc') 
    end
  end
  if !@term_paths.empty?
    byTerm = {}
    byValue = {}
    # Calc per term
    @term_paths.each do |term, info|
      level = shortest_path ? info[:shortest_path] : info[:largest_path]
      if level.nil?
        level = -1
      else
        level = level.round(0)
      end
      byTerm[term] = level
      queryLevels = byValue[level]
      if queryLevels.nil?
        byValue[level] = [term]
      else
        byValue[level] << term
      end
    end
    @dicts[:level] = {byTerm: byValue, byValue: byTerm} # Note: in this case, value has multiplicity and term is unique value
    # Update maximum depth
    @max_freqs[:max_depth] = byValue.keys.max
  end
end

#calc_term_pathsObject

Find paths of a term following it ancestors and stores all possible paths for it and it’s parentals. Also calculates paths metadata and stores into @term_paths



1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
# File 'lib/semtools/ontology.rb', line 1627

def calc_term_paths
  self.calc_ancestors_dictionary if !@dicts.keys.include?(:is_a) # Calculate direct parentals dictionary if it's not already calculated
  visited_terms = []
  @term_paths = {}
  if [:hierarchical, :sparse].include? @structureType
    terms = @stanzas[:terms].keys
    terms.each do |term|
      if self.is_obsolete?(term) || self.is_alternative?(term)  # Special case (obsoletes)
        special_term = term
        term = self.is_obsolete?(term) ? @obsoletes_index[term] : @alternatives_index[term]
        @term_paths[term] = {total_paths: 0, largest_path: 0, shortest_path: 0, paths: []} if !@term_paths.include?(term)
        @term_paths[special_term] = @term_paths[term]
        visited_terms << special_term
      end

      if !visited_terms.include?(term)
        @term_paths[term] = {total_paths: 0, largest_path: 0, shortest_path: 0, paths: []} if !@term_paths.include?(term)
        parentals = @dicts[:is_a][:byTerm][term]
        if parentals.nil?
          @term_paths[term][:paths] << [term]
        else
          parentals.each do |direct_parental|
            if visited_terms.include? direct_parental # Use direct_parental already calculated paths
              new_paths = @term_paths[direct_parental][:paths].map{|path| [term, path].flatten}
            else # Calculate new paths
              self.expand_path(direct_parental, visited_terms)
              new_paths = @term_paths[direct_parental][:paths].map{|path| [term, path].flatten}
            end
            new_paths.each{|path| @term_paths[term][:paths] << path}
          end
        end
        visited_terms << term
      end
      # Update metadata
      @term_paths[term][:total_paths] = @term_paths[term][:paths].length
      paths_sizes = @term_paths[term][:paths].map{|path| path.length}
      @term_paths[term][:largest_path] = paths_sizes.max
      @term_paths[term][:shortest_path] = paths_sizes.min
    end
  else
    warn('Ontology structure must be hierarchical or sparse to calculate term levels. Aborting paths calculation')
  end
end

#check_ids(ids, substitute: true) ⇒ Object

Check a pull of IDs and return allowed IDs removing which are not official terms on this ontology

Parameters
ids

to be checked

Return

two arrays whit allowed and rejected IDs respectively



1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
# File 'lib/semtools/ontology.rb', line 1243

def check_ids(ids, substitute: true)
  checked_codes = []
  rejected_codes = []
  ids.each do |id|
    if @stanzas[:terms].include? id
      if substitute
        checked_codes << self.get_main_id(id)
      else
        checked_codes << id
      end
    else
      rejected_codes << id
    end
  end
  return checked_codes, rejected_codes
end

#clean_profile(profile, remove_alternatives: true) ⇒ Object

Remove alternatives (if official term is present) and ancestors terms of a given profile

Parameters
profile

profile to be cleaned

remove_alternatives

if true, clenaed profiles will replace already stored profiles

Returns

cleaned profile



1516
1517
1518
1519
1520
1521
1522
1523
1524
# File 'lib/semtools/ontology.rb', line 1516

def clean_profile(profile, remove_alternatives: true)
  terms_without_ancestors, _ = self.remove_ancestors_from_profile(profile)
  if remove_alternatives
    terms_without_ancestors_and_alternatices, _ = self.remove_alternatives_from_profile(terms_without_ancestors)
  else
    terms_without_ancestors_and_alternatices = terms_without_ancestors
  end
  return terms_without_ancestors_and_alternatices
end

#clean_profiles(store: false, remove_alternatives: true) ⇒ Object

Remove alternatives (if official term is present) and ancestors terms of stored profiles

Parameters
store

if true, clenaed profiles will replace already stored profiles

remove_alternatives

if true, clenaed profiles will replace already stored profiles

Returns

a hash with cleaned profiles



1533
1534
1535
1536
1537
1538
# File 'lib/semtools/ontology.rb', line 1533

def clean_profiles(store: false, remove_alternatives: true)
  cleaned_profiles = {}
  @profiles.each{ |id, terms| cleaned_profiles[id] = self.clean_profile(terms, remove_alternatives: remove_alternatives)}
  @profiles = cleaned_profiles if store
  return cleaned_profiles
end

#cloneObject



2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
# File 'lib/semtools/ontology.rb', line 2011

def clone
  copy = Ontology.new
  copy.header = self.header.clone
    copy.stanzas[:terms] = self.stanzas[:terms].clone
    copy.stanzas[:typedefs] = self.stanzas[:typedefs].clone
    copy.stanzas[:instances] = self.stanzas[:instances].clone
    copy.ancestors_index = self.ancestors_index.clone
    copy.descendants_index = self.descendants_index.clone
    copy.alternatives_index = self.alternatives_index.clone
    copy.obsoletes_index = self.obsoletes_index.clone
    copy.structureType = self.structureType.clone
    copy.ics = self.ics.clone
    copy.meta = self.meta.clone
    copy.dicts = self.dicts.clone
    copy.profiles = self.profiles.clone
    copy.profilesDict = self.profilesDict.clone
    copy.items = self.items.clone
    copy.removable_terms = self.removable_terms.clone
    copy.term_paths = self.term_paths.clone
    copy.max_freqs = self.max_freqs.clone
    return copy
end

#compare(termsA, termsB, sim_type: :resnik, ic_type: :resnik, bidirectional: true) ⇒ Object

Compare to terms sets

Parameters
termsA

set to be compared

termsB

set to be compared

sim_type

similitude method to be used. Default: resnik

ic_type

ic type to be used. Default: resnik

bidirectional

calculate bidirectional similitude. Default: false

Return

similitude calculated

Raises:

  • (ArgumentError)


415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
# File 'lib/semtools/ontology.rb', line 415

def compare(termsA, termsB, sim_type: :resnik, ic_type: :resnik, bidirectional: true)
  # Check
  raise ArgumentError, "Terms sets given are NIL" if termsA.nil? | termsB.nil?
  raise ArgumentError, "Set given is empty. Aborting similarity calc" if termsA.empty? | termsB.empty?
  micasA = []
  # Compare A -> B
  termsA.each do |tA|
    micas = termsB.map{|tB| self.get_similarity(tA, tB, type: sim_type, ic_type: ic_type)}
    # Remove special cases
    [false,nil].each do |err_value| micas.delete(err_value) end
    # Obtain maximum value
    micasA << micas.max if micas.length > 0
    micasA << 0 if micas.length <= 0
  end
  means_sim = micasA.inject{ |sum, el| sum + el }.to_f / micasA.size
  # Compare B -> A
  if bidirectional
    means_simA = means_sim * micasA.size
    means_simB = self.compare(termsB, termsA, sim_type: sim_type, ic_type: ic_type, bidirectional: false) * termsB.size
    means_sim = (means_simA + means_simB) / (termsA.size + termsB.size)
  end
  # Return
  return means_sim
end

#compare_profiles(external_profiles: nil, sim_type: :resnik, ic_type: :resnik, bidirectional: true) ⇒ Object

Compare internal stored profiles against another set of profiles. If an external set is not provided, internal profiles will be compared with itself

Parameters
external_profiles

set of external profiles. If nil, internal profiles will be compared with itself

sim_type

similitude method to be used. Default: resnik

ic_type

ic type to be used. Default: resnik

bidirectional

calculate bidirectional similitude. Default: false

Return

Similitudes calculated



449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
# File 'lib/semtools/ontology.rb', line 449

def compare_profiles(external_profiles: nil, sim_type: :resnik, ic_type: :resnik, bidirectional: true)
  profiles_similarity = {} #calculate similarity between patients profile
  profiles_ids = @profiles.keys
  if external_profiles.nil?
    comp_ids = profiles_ids
    comp_profiles = @profiles
    main_ids = comp_ids
    main_profiles = comp_profiles
  else
    comp_ids = external_profiles.keys
    comp_profiles = external_profiles
    main_ids = profiles_ids
    main_profiles = @profiles
  end
  # Compare
  while !main_ids.empty?
    curr_id = main_ids.shift
    current_profile = main_profiles[curr_id]
    comp_ids.each do |id|
      profile = comp_profiles[id]
      value = compare(current_profile, profile, sim_type: sim_type, ic_type: ic_type, bidirectional: bidirectional)
      query = profiles_similarity[curr_id]
      if query.nil?
        profiles_similarity[curr_id] = {id => value}
      else
        query[id] = value
      end
    end    
  end
  return profiles_similarity
end

#compute_relations_to_items(external_item_list, mode, thresold) ⇒ Object

NO IDEA WHAT THIS DOES. DON’T USE THIS METHODS IS NOT CHECKED

Parameters
++
Returns



1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
# File 'lib/semtools/ontology.rb', line 1929

def compute_relations_to_items(external_item_list, mode, thresold)
  results = []
  penalized_terms = {}
  # terms_levels = get_terms_levels(@items_relations.keys)
  terms_with_items_levels = @items_relations.keys.map{|term| self.get_term_level(term)}.uniq
  terms_levels = self.get_ontology_levels().select{|k,v| terms_with_items_levels.include?(k)}
  terms_levels = terms_levels.each{|level,terms| [level, terms.select{|t| @items_relations.keys.include?(t)}] } # Use only items terms. MAYBE IT'S NOT OUR TARGET (line added by fmj)
  levels = terms_levels.keys.sort
  levels.reverse_each do |level|
    terms_levels[level].each do |term|
      associated_items = @items_relations[term]
      if mode == :elim 
        items_to_remove = penalized_terms[term]
        items_to_remove = [] if items_to_remove.nil?
        pval = get_fisher_exact_test(
          external_item_list - items_to_remove, 
          associated_items - items_to_remove, 
          ((associated_items | external_item_list) - items_to_remove).length
          )
        if pval <= thresold
          parents = get_parents(term) # Save the items for each parent term to remove them later in the fisher test
          parents.each do |prnt|
            query = penalized_terms[prnt]
            if query.nil?
              penalized_terms[prnt] = @items_relations[term].clone # We need a new array to store the following iterations
            else
              query.concat(@items_relations[term])
            end
          end
        end
      end
      results << [term, pval]
    end
  end
  return results
end

#exists?(id) ⇒ Boolean

Check if a given ID is stored as term into this object

Parameters
id

to be checked

Return

True if term is allowed or false in other cases

Returns:

  • (Boolean)


999
1000
1001
# File 'lib/semtools/ontology.rb', line 999

def exists? id
  return stanzas[:terms].include?(id)
end

#expand_items_to_parentals(ontology: nil, minimum_childs: 2, clean_profiles: true) ⇒ Object

This method computes childs similarity and impute items to it parentals. To do that Item keys must be this ontology allowed terms. Similarity will be calculated by text extact similarity unless an ontology object will be provided. In this case, MICAs will be used

Parameters
ontology

(Optional) ontology object which items given belongs

minimum_childs

minimum of childs needed to infer relations to parental. Default: 2

clean_profiles

if true, clena_profiles ontology method will be used over inferred profiles. Only if an ontology object is provided

Returns

void and update items object



1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
# File 'lib/semtools/ontology.rb', line 1834

def expand_items_to_parentals(ontology: nil, minimum_childs: 2, clean_profiles: true)
  # Check item keys
  if @items.empty?
    warn('Items have been not provided yet')
    return nil
  end
  targetKeys = @items.keys.select{|k| self.exists?(k)}
  if targetKeys.length == 0
    warn('Any item key is allowed')
    return nil
  elsif targetKeys.length < @items.keys.length
    warn('Some item keys are not allowed')
  end

  # Expand to parentals
  targetKeys << targetKeys.map{|t| self.get_ancestors(t, true)}
  targetKeys.flatten!
  targetKeys.uniq!

  # Obtain levels (go from leaves to roots)
  levels = targetKeys.map{|term| self.get_term_level(term)}
  levels.compact!
  levels.uniq!
  levels.sort!
  levels.reverse!
  levels.shift # Leaves are not expandable

  # Expand from leaves to roots
  levels.map do |lvl|
    curr_keys = targetKeys.select{|k| self.get_term_level(k) == lvl}
    curr_keys.map do |term_expand|
      to_infer = []
      # Obtain childs
      childs = self.get_descendants(term_expand,true).select{|t| @items.keys.include?(t)}
      # Expand
      if childs.length > 0 && minimum_childs == 1 # Special case
        to_infer = childs.map{|c| @items[c]}.flatten.compact.uniq
      elsif childs.length >= minimum_childs
        to_infer = Hash.new(0)
        # Compare
        while childs.length > 1
          curr_term = childs.shift
          childs.each do |compare_term|
            pivot_items = @items[curr_term]
            compare_items = @items[compare_term]
            if ontology.nil? # Exact match
              pivot_items.map do |pitem|
                if compare_items.include?(pitem)
                  to_infer[pitem] += 2
                end
              end
            else # Find MICAs
              local_infer = Hash.new(0)
              pivot_items.map do |pitem|
                micas = compare_items.map{|citem| ontology.get_MICA(pitem, citem)}
                maxmica = micas[0]
                micas.each{|mica| maxmica = mica if mica.last > maxmica.last}
                local_infer[maxmica.first] += 1
              end
              compare_items.map do |citem|
                micas = pivot_items.map{|pitem| ontology.get_MICA(pitem, citem)}
                maxmica = micas[0]
                micas.each{|mica| maxmica = mica if mica.last > maxmica.last}
                local_infer[maxmica.first] += 1
              end
              local_infer.each{|t,freq| to_infer[t] += freq if freq >= 2}
            end
          end
        end
        # Filter infer
        to_infer = to_infer.select{|k,v| v >= minimum_childs}
      end
      # Infer
      if to_infer.length > 0
        @items[term_expand] = [] if @items[term_expand].nil?
        if to_infer.kind_of?(Array)
          @items[term_expand] = (@items[term_expand] + to_infer).uniq
        else
          @items[term_expand] = (@items[term_expand] + to_infer.keys).uniq
        end
        @items[term_expand] = ontology.clean_profile(@items[term_expand]) if clean_profiles && !ontology.nil?
      elsif !@items.include?(term_expand)
        targetKeys.delete(term_expand)
      end
    end
  end
end

#expand_path(curr_term, visited_terms) ⇒ Object

Recursive function whic finds paths of a term following it ancestors and stores all possible paths for it and it’s parentals

Parameters
curr_term

current visited term

visited_terms

already expanded terms



1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
# File 'lib/semtools/ontology.rb', line 1676

def expand_path(curr_term, visited_terms)
  if !visited_terms.include?(curr_term) # Not already expanded
    @term_paths[curr_term] = {total_paths: 0, largest_path: 0, shortest_path: 0, paths: []} if @term_paths[curr_term].nil?
    direct_parentals = @dicts[:is_a][:byTerm][curr_term]
    if direct_parentals.nil? # No parents :: End of recurrence
      @term_paths[curr_term][:paths] << [curr_term]
    else # Expand and concat
      direct_parentals.each do |ancestor|
        self.expand_path(ancestor,visited_terms) if !visited_terms.include?(ancestor)
        new_paths = @term_paths[ancestor][:paths].map{|path| [curr_term, path].flatten}
        new_paths.each{|path| @term_paths[curr_term][:paths] << path}
      end
    end
    visited_terms << curr_term
  end
end

#extract_id(text, splitBy: ' ') ⇒ Object

This method assumes that a text given contains an allowed ID. And will try to obtain it splitting it

Parameters
text

to be checked

Return

The correct ID if it can be found or nil in other cases



1009
1010
1011
1012
1013
1014
1015
1016
# File 'lib/semtools/ontology.rb', line 1009

def extract_id(text, splitBy: ' ')
  if self.exists?(text)
    return text
  else
    splittedText = text.to_s.split(splitBy).first.to_sym
    return self.exists?(splittedText) ? splittedText : nil
  end
end

#get_ancestors(term, filter_alternatives = false) ⇒ Object

Find ancestors of a given term

Parameters
term

to be checked

filter_alternatives

if true, remove alternatives from final results

Returns

an array with all ancestors of given term or false if parents are not available yet



658
659
660
# File 'lib/semtools/ontology.rb', line 658

def get_ancestors(term, filter_alternatives = false)
  return self.get_familiar(term, true, filter_alternatives)    
end

#get_childs_table(terms, filter_alternatives = false) ⇒ Object

Gets metainfo table from a set of terms

Parameters
terms

IDs to be expanded

filter_alternatives

flag to be used in get_descendants method

Returns

an array with triplets [TermID, TermName, DescendantsNames]



1775
1776
1777
1778
1779
1780
1781
# File 'lib/semtools/ontology.rb', line 1775

def get_childs_table(terms, filter_alternatives = false)
  expanded_terms = []
  terms.each do |t|
    expanded_terms << [[t, self.translate_id(t)], self.get_descendants(t, filter_alternatives).map{|child| [child, self.translate_id(child)]}]
  end
  return expanded_terms
end

#get_descendants(term, filter_alternatives = false) ⇒ Object

Find descendants of a given term

Parameters
term

to be checked

filter_alternatives

if true, remove alternatives from final results

Returns

an array with all descendants of given term or false if parents are not available yet



669
670
671
# File 'lib/semtools/ontology.rb', line 669

def get_descendants(term, filter_alternatives = false)
  return self.get_familiar(term, false, filter_alternatives)   
end

#get_familiar(term, return_ancestors = true, filter_alternatives = false) ⇒ Object

Find ancestors/descendants of a given term

Parameters
term

to be checked

return_ancestors

return ancestors if true or descendants if false

filter_alternatives

if true, remove alternatives from final results

Returns

an array with all ancestors/descendants of given term or nil if parents are not available yet



681
682
683
684
685
686
687
688
689
690
691
692
693
# File 'lib/semtools/ontology.rb', line 681

def get_familiar(term, return_ancestors = true, filter_alternatives = false)
  # Find into parentals
  familiars = return_ancestors ? @ancestors_index[term] : @descendants_index[term]   
  if !familiars.nil?
    familiars = familiars.clone
    if filter_alternatives
      familiars.reject!{|fm| @alternatives_index.include?(fm)}
    end
  else
    familiars = []
  end
  return familiars
end

#get_frequency(term, type: :struct_freq) ⇒ Object

Get a term frequency

Parameters
term

term to be checked

type

type of frequency to be returned. Allowed: [:struct_freq, :observed_freq]

Returns

frequency of term given or nil if term is not allowed



1405
1406
1407
1408
# File 'lib/semtools/ontology.rb', line 1405

def get_frequency(term, type: :struct_freq)
  queryFreq = @meta[term]
  return queryFreq.nil? ? nil : queryFreq[type]    
end

#get_IC(termRaw, type: :resnik, force: false, zhou_k: 0.5) ⇒ Object

Obtain IC of an specific term

Parameters
term

which IC will be calculated

type

of IC to be calculated. Default: resnik

force

force re-calculate the IC. Do not check if it is already calculated

zhou_k

special coeficient for Zhou IC method

Returns

the IC calculated

Raises:

  • (ArgumentError)


704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
# File 'lib/semtools/ontology.rb', line 704

def get_IC(termRaw, type: :resnik, force: false, zhou_k: 0.5)
  term = termRaw.to_sym
  # Check 
  raise ArgumentError, "IC type specified (#{type}) is not allowed" if !@@allowed_calcs[:ics].include?(type)
  # Check if it's already calculated
  return @ics[type][term] if (@ics[type].include? term) && !force
  # Calculate
  ic = - 1
  case type # https://arxiv.org/ftp/arxiv/papers/1310/1310.8059.pdf  |||  https://sci-hub.st/https://doi.org/10.1016/j.eswa.2012.01.082
    ###########################################
    #### STRUCTURE BASED METRICS
    ###########################################
    # Shortest path
    # Weighted Link
    # Hirst and St-Onge Measure
    # Wu and Palmer
    # Slimani
    # Li
    # Leacock and Chodorow
    ###########################################
    #### INFORMATION CONTENT METRICS
    ###########################################
    when :resnik # Resnik P: Using Information Content to Evaluate Semantic Similarity in a Taxonomy
      # -log(Freq(x) / Max_Freq)
      ic = -Math.log10(@meta[term][:struct_freq].fdiv(@max_freqs[:struct_freq]))
    when :resnik_observed 
      # -log(Freq(x) / Max_Freq)
      ic = -Math.log10(@meta[term][:observed_freq].fdiv(@max_freqs[:observed_freq]))
    # Lin
    # Jiang & Conrath

    ###########################################
    #### FEATURE-BASED METRICS
    ###########################################
    # Tversky
    # x-similarity
    # Rodirguez

    ###########################################
    #### HYBRID METRICS
    ###########################################
    when :seco, :zhou # SECO:: An intrinsic information content metric for semantic similarity in WordNet
      #  1 - ( log(hypo(x) + 1) / log(max_nodes) )
      ic = 1 - Math.log10(@meta[term][:struct_freq]).fdiv(Math.log10(@stanzas[:terms].length - @alternatives_index.length))
      if :zhou # New Model of Semantic Similarity Measuring in Wordnet       
        # k*(IC_Seco(x)) + (1-k)*(log(depth(x))/log(max_depth))
        @ics[:seco][term] = ic # Special store
        ic = zhou_k * ic + (1.0 - zhou_k) * (Math.log10(@meta[term][:descendants]).fdiv(Math.log10(@max_freqs[:max_depth])))
      end
    when :sanchez # Semantic similarity estimation in the biomedical domain: An ontology-basedinformation-theoretic perspective
      ic = -Math.log10((@meta[term][:descendants].fdiv(@meta[term][:ancestors]) + 1.0).fdiv(@max_freqs[:max_depth] + 1.0))
    # Knappe
  end      
  @ics[type][term] = ic
  return ic
end

#get_ICMICA(termA, termB, ic_type = :resnik) ⇒ Object

Find the IC of the Most Index Content shared Ancestor (MICA) of two given terms

Parameters
termA

term to be cheked

termB

term to be checked

ic_type

IC formula to be used

Returns

the IC of the MICA(termA,termB)



789
790
791
792
# File 'lib/semtools/ontology.rb', line 789

def get_ICMICA(termA, termB, ic_type = :resnik)
  mica = self.get_MICA(termA, termB, ic_type)
  return mica.first.nil? ? nil : mica.last
end

#get_index_alternatives(alt_tag: ) ⇒ Object

Expand alternative IDs arround all already stored terms

Parameters
alt_tag

tag used to expand alternative IDs

Returns

true if process ends without errors and false in other cases



487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
# File 'lib/semtools/ontology.rb', line 487

def get_index_alternatives(alt_tag: @@basic_tags[:alternative][0])
  # Check input
  raise('stanzas terms empty')  if @stanzas[:terms].empty?
  # Take all alternative IDs
  alt_ids2add = {}
  @stanzas[:terms].each do |id, tags|
    alt_ids = tags[alt_tag]
    if !alt_ids.nil?
      alt_ids = alt_ids - @removable_terms
      # Update info
      alt_ids.each do |alt_term|
        @alternatives_index[alt_term] = id
        alt_ids2add[alt_term] = @stanzas[:terms][id] if !@stanzas[:terms].include?(alt_term)
        @ancestors_index[alt_term] = @ancestors_index[id] if !@ancestors_index[id].nil?
      end
    end
  end
  @stanzas[:terms].merge!(alt_ids2add)
end

#get_index_child_parent_relations(tag: ) ⇒ Object

Expand parentals set and link all info to their alternative IDs. Also launch frequencies process

Parameters
tag

tag used to expand parentals

split_info_char

special regex used to split info (if it is necessary)

split_info_indx

special index to take splitted info (if it is necessary)

Returns

true if process ends without errors and false in other cases



608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
# File 'lib/semtools/ontology.rb', line 608

def get_index_child_parent_relations(tag: @@basic_tags[:ancestors][0])
  # Check
  if @stanzas[:terms].nil?
    warn('stanzas terms empty')
  else
    # Expand
    structType, parentals = self.class.get_related_ids_by_tag(terms: @stanzas[:terms],
                            target_tag: tag,
                            alt_ids: @alternatives_index,
                            obsoletes: @obsoletes_index.length)
    # Check
    raise('Error expanding parentals')  if (structType.nil?) || parentals.nil?
    # Prepare ancestors structure
    anc = {}
    des = {}
    parentals.each do |id, parents|
      parents = parents - @removable_terms
      anc[id] = parents
      parents.each do |anc_id| # Add descendants
        if !des.include?(anc_id)
          des[anc_id] = [id]
        else 
          des[anc_id] << id
        end
      end
    end
    # Store alternatives
    @alternatives_index.each do |id,alt|
      anc[id] = anc[alt] if anc.include?(alt)
      des[id] = des[alt] if des.include?(alt)
    end
    # Check structure
    if ![:atomic,:sparse].include? structType
      structType = structType == :circular ? :circular : :hierarchical
    end
    # Store
    @ancestors_index = anc
    @descendants_index = des
    @structureType = structType
  end
  # Finish   
end

#get_index_frequenciesObject

Calculates regular frequencies based on ontology structure (using parentals)

Returns

true if everything end without errors and false in other cases



533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
# File 'lib/semtools/ontology.rb', line 533

def get_index_frequencies()
  # Check
  if @ancestors_index.empty?
    warn('ancestors_index object is empty') 
  else
    # Prepare useful variables
    alternative_terms = @alternatives_index.keys
    # Per each term, add frequencies
    @stanzas[:terms].each do |id, tags|     
      if @alternatives_index.include?(id)
        alt_id = @alternatives_index[id]
        query = @meta[alt_id] # Check if exist
        if query.nil?
          query = {ancestors: 0.0, descendants: 0.0, struct_freq: 0.0, observed_freq: 0.0}
          @meta[alt_id] = query 
        end 
        @meta[id] = query
        # Note: alternative terms do not increase structural frequencies
      else # Official term
        query = @meta[id] # Check if exist
        if query.nil?
          query = {ancestors: 0.0, descendants: 0.0, struct_freq: 0.0, observed_freq: 0.0}
          @meta[id] = query 
        end
        # Store metadata
        query[:ancestors] = @ancestors_index.include?(id) ? @ancestors_index[id].count{|anc| !alternative_terms.include?(anc)}.to_f : 0.0
        query[:descendants] = @descendants_index.include?(id) ? @descendants_index[id].count{|desc| !alternative_terms.include?(desc)}.to_f : 0.0
        query[:struct_freq] = query[:descendants] + 1.0
        # Update maximums
        @max_freqs[:struct_freq] = query[:struct_freq] if @max_freqs[:struct_freq] < query[:struct_freq]  
        @max_freqs[:max_depth] = query[:descendants] if @max_freqs[:max_depth] < query[:descendants]  
      end
    end
  end
end

#get_index_obsoletes(obs_tag: , alt_tags: ) ⇒ Object

Expand obsoletes set and link info to their alternative IDs

Parameters
obs_tags

tags to be used to find obsoletes

alt_tags

tags to find alternative IDs (if are available)

reset_obsoletes

flag to indicate if obsoletes set must be reset. Default: true

Returns

true if process ends without errors and false in other cases



577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
# File 'lib/semtools/ontology.rb', line 577

def get_index_obsoletes(obs_tag: @@basic_tags[:obsolete], alt_tags: @@basic_tags[:alternative])
  if @stanzas[:terms].empty?
    warn('stanzas terms empty')
  else
    # Check obsoletes
    @stanzas[:terms].each do |id, term_tags|
      next if term_tags.nil?
      query = term_tags[obs_tag]
      if !query.nil? && query == 'true' # Obsolete tag presence 
        next if !@obsoletes_index[id].nil? # Already stored
        # Check if alternative value is available
        alt_ids = alt_tags.map{|alt| term_tags[alt]}.compact
        if !alt_ids.empty?
          alt_id = alt_ids.first.first #FIRST tag, FIRST id 
          # Store
          @alternatives_index[id] = alt_id
          @obsoletes_index[id] = alt_id
        end
      end
    end
  end
end

#get_main_id(id) ⇒ Object

Returns

the main ID assigned to a given ID. If it’s a non alternative/obsolete ID itself will be returned

Parameters
id

to be translated

Return

main ID related to a given ID. Returns nil if given ID is not an allowed ID



1229
1230
1231
1232
1233
1234
1235
# File 'lib/semtools/ontology.rb', line 1229

def get_main_id(id)
  return nil if !@stanzas[:terms].include? id
  new_id = id
  mainID = @alternatives_index[id]
  new_id = mainID if !mainID.nil? & !@obsoletes_index.include?(mainID)
  return new_id
end

#get_MICA(termA, termB, ic_type = :resnik) ⇒ Object

Find the Most Index Content shared Ancestor (MICA) of two given terms

Parameters
termA

term to be cheked

termB

term to be checked

ic_type

IC formula to be used

Returns

the MICA(termA,termB) and it’s IC



802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
# File 'lib/semtools/ontology.rb', line 802

def get_MICA(termA, termB, ic_type = :resnik)
  termA = @alternatives_index[termA] if @alternatives_index.include?(termA)
  termB = @alternatives_index[termB] if @alternatives_index.include?(termB)
  mica = [nil,-1.0]
  # Special case
  if termA.eql?(termB)
    ic = self.get_IC(termA, type: ic_type)
    mica = [termA, ic]
  else 
    # Obtain ancestors (include itselfs too)
    anc_A = self.get_ancestors(termA) 
    anc_B = self.get_ancestors(termB)

    if !(anc_A.empty? && anc_B.empty?)
      anc_A << termA
      anc_B << termB
      # Find shared ancestors
      shared_ancestors = anc_A & anc_B
      # Find MICA
      if shared_ancestors.length > 0
        shared_ancestors.each do |anc|
          ic = self.get_IC(anc, type: ic_type)
          # Check
          mica = [anc,ic] if ic > mica[1]
        end
      end
    end
  end
  return mica
end

#get_observed_frequency(term) ⇒ Object

Gets observed frequency of a term given

Parameters
term

to be checked

Returns

observed frequency of given term or nil if term is not allowed



1426
1427
1428
# File 'lib/semtools/ontology.rb', line 1426

def get_observed_frequency(term)
  return self.get_frequency(term, type: :observed_freq)
end

#get_observed_ics_by_onto_and_freqObject

Calculates and return resnik ICs (by ontology and observed frequency) for observed terms

Returns

two hashes with resnik and resnik_observed ICs for observed terms



765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
# File 'lib/semtools/ontology.rb', line 765

def get_observed_ics_by_onto_and_freq
  # Chech there are observed terms
  if @profiles.empty?
    resnik = {}
    resnik_observed = {}
  else
    # Calc ICs for all terms
    observed_terms = @profiles.values.flatten.uniq
    observed_terms.each{ |term| get_IC(term)}
    observed_terms.each{ |term| get_IC(term, type: :resnik_observed)}
    resnik = @ics[:resnik].select{|k,v| observed_terms.include?(k)}
    resnik_observed = @ics[:resnik_observed].select{|k,v| observed_terms.include?(k)}
  end
  return resnik.clone, resnik_observed.clone
end

#get_ontology_levelsObject

Gets ontology levels calculated

Returns

ontology levels calculated



1697
1698
1699
# File 'lib/semtools/ontology.rb', line 1697

def get_ontology_levels
  return @dicts[:level][:byTerm].clone # By term, in this case, is Key::Level, Value::Terms
end

#get_ontology_levels_from_profiles(uniq = true) ⇒ Object

Return ontology levels from profile terms

Returns

hash of term levels (Key: level; Value: array of term IDs)



1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
# File 'lib/semtools/ontology.rb', line 1713

def get_ontology_levels_from_profiles(uniq = true) # TODO: remove uniq and check dependencies
  profiles_terms = @profiles.values.flatten
  profiles_terms.uniq! if uniq
  term_freqs_byProfile = {}
  profiles_terms.each do |term|
    query = term_freqs_byProfile[term]
    if query.nil?
      term_freqs_byProfile[term] = 1
    else
      term_freqs_byProfile[term] += 1
    end
  end
  levels_filtered = @dicts[:level][:byTerm].map{|level, terms| [level,terms.map{|t| profiles_terms.include?(t) ? Array.new(term_freqs_byProfile[t], t) : nil}.flatten.compact]}.select{|level, filteredTerms| !filteredTerms.empty?}.to_h
  return levels_filtered
end

#get_profile(id) ⇒ Object

Returns

profiles assigned to a given ID

Parameters
id

profile ID

Return

specific profile or nil if it’s not stored



1324
1325
1326
# File 'lib/semtools/ontology.rb', line 1324

def get_profile(id)
  return @profiles[id]
end

#get_profile_length_at_percentile(perc = 50, increasing_sort: false) ⇒ Object

Calculates profiles sizes and returns size assigned to percentile given

Parameters
perc

percentile to be returned

increasing_sort

flag to indicate if sizes order must be increasing. Default: false

Returns

values assigned to percentile asked



1356
1357
1358
1359
1360
1361
1362
1363
# File 'lib/semtools/ontology.rb', line 1356

def get_profile_length_at_percentile(perc=50, increasing_sort: false)
  prof_lengths = self.get_profiles_sizes.sort
  prof_lengths.reverse! if !increasing_sort
  n_profiles = prof_lengths.length 
  percentile_index = ((perc * (n_profiles - 1)).fdiv(100) - 0.5).round # Take length which not overpass percentile selected
  percentile_index = 0 if percentile_index < 0 # Special case (caused by literal calc)
  return prof_lengths[percentile_index]
end

#get_profile_mean_IC(prof, ic_type: :resnik, zhou_k: 0.5) ⇒ Object

Calculates mean IC of a given profile

Parameters
prof

profile to be checked

ic_type

ic_type to be used

zhou_k

special coeficient for Zhou IC method

Returns

mean IC for a given profile



1558
1559
1560
# File 'lib/semtools/ontology.rb', line 1558

def get_profile_mean_IC(prof, ic_type: :resnik, zhou_k: 0.5)
  return prof.map{|term| self.get_IC(term, type: ic_type, zhou_k: zhou_k)}.inject(0){|sum,x| sum + x}.fdiv(prof.length)
end

#get_profiles_mean_size(round_digits: 4) ⇒ Object

Returns

mean size of stored profiles

Parameters
round_digits

number of digits to round result. Default: 4

Returns

mean size of stored profiles



1344
1345
1346
1347
# File 'lib/semtools/ontology.rb', line 1344

def get_profiles_mean_size(round_digits: 4)
  sizes = self.get_profiles_sizes
  return sizes.inject(0){|sum, n| sum + n}.fdiv(@profiles.length).round(round_digits)
end

#get_profiles_resnik_dual_ICsObject

Calculates resnik ontology, and resnik observed mean ICs for all profiles stored

Returns

two hashes with Profiles and IC calculated for resnik and observed resnik respectively



1566
1567
1568
1569
1570
1571
1572
1573
1574
# File 'lib/semtools/ontology.rb', line 1566

def get_profiles_resnik_dual_ICs
  struct_ics = {}
  observ_ics = {}
  @profiles.each do |id, terms|
    struct_ics[id] = self.get_profile_mean_IC(terms, ic_type: :resnik)
    observ_ics[id] = self.get_profile_mean_IC(terms, ic_type: :resnik_observed)
  end
  return struct_ics.clone, observ_ics.clone
end

#get_profiles_sizesObject

Returns

an array of sizes for all stored profiles

Return

array of profile sizes



1333
1334
1335
# File 'lib/semtools/ontology.rb', line 1333

def get_profiles_sizes()
  return @profiles.map{|id,terms| terms.length}
end

#get_profiles_terms_frequency(ratio: true, literal: true, asArray: true, translate: true) ⇒ Object

Calculates frequencies of stored profiles terms

Parameters
ratio

if true, frequencies will be returned as ratios between 0 and 1.

literal

if true, literal terms will be used to calculate frequencies instead translate alternative terms

asArray

used to transform returned structure format from hash of Term-Frequency to an array of tuples [Term, Frequency]

translate

if true, term IDs will be translated to

Returns

stored profiles terms frequencies



1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
# File 'lib/semtools/ontology.rb', line 1439

def get_profiles_terms_frequency(ratio: true, literal: true, asArray: true, translate: true)
  n_profiles = @profiles.length
  if literal
    freqs = {}
    @profiles.each do |id, terms|
      terms.each do |literalTerm|
        if freqs.include?(literalTerm)
          freqs[literalTerm] += 1
        else
          freqs[literalTerm] = 1
        end
      end
    end
    if (ratio || translate)
      aux_keys = freqs.keys
      aux_keys.each do |term| 
        freqs[term] = freqs[term].fdiv(n_profiles) if ratio
        if translate
          tr = self.translate_id(term)
          freqs[tr] = freqs.delete(term) if !tr.nil?
        end
      end
    end
    if asArray
      freqs = freqs.map{|term, freq| [term, freq]}
      freqs.sort!{|h1, h2| h2[1] <=> h1[1]}
    end
  else # Freqs translating alternatives
    freqs = @meta.select{|id, freqs| freqs[:observed_freq] > 0}.map{|id, freqs| [id, ratio ? freqs[:observed_freq].fdiv(n_profiles) : freqs[:observed_freq]]}
    freqs = freqs.to_h if !asArray
    if translate
      freqs = freqs.map do |term, freq|
        tr = self.translate_id(term)
        tr.nil? ? [term, freq] : [tr, freq]
      end
    end
    if asArray
      freqs = freqs.map{|term, freq| [term, freq]}
      freqs.sort!{|h1, h2| h2[1] <=> h1[1]}
    else
      freqs = freqs.to_h
    end
  end
  return freqs
end

#get_similarity(termA, termB, type: :resnik, ic_type: :resnik) ⇒ Object

Calculate similarity between two given terms

Parameters
termsA

to be compared

termsB

to be compared

type

similitude formula to be used

ic_type

IC formula to be used

Returns

the similarity between both sets or false if frequencies are not available yet

Raises:

  • (ArgumentError)


842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
# File 'lib/semtools/ontology.rb', line 842

def get_similarity(termA, termB, type: :resnik, ic_type: :resnik)
  # Check
  raise ArgumentError, "SIM type specified (#{type}) is not allowed" if !@@allowed_calcs[:sims].include?(type)
  sim = nil
  # Launch comparissons
  sim_res = get_ICMICA(termA, termB, ic_type)
  if !sim_res.nil?
    case type
      when :resnik
        sim = sim_res
      when :lin
        sim = (2.0 * sim_res).fdiv(self.get_IC(termA,type: ic_type) + self.get_IC(termB,type: ic_type))
      when :jiang_conrath # This is not a similarity, this is a disimilarity (distance)
        sim = (self.get_IC(termA, type: ic_type) + self.get_IC(termB, type: ic_type)) - (2.0 * sim_res)
    end
  end
  return sim
end

#get_structural_frequency(term) ⇒ Object

Geys structural frequency of a term given

Parameters
term

to be checked

Returns

structural frequency of given term or nil if term is not allowed



1416
1417
1418
# File 'lib/semtools/ontology.rb', line 1416

def get_structural_frequency(term)
  return self.get_frequency(term, type: :struct_freq)
end

#get_term_level(term) ⇒ Object

Gets ontology level of a specific term

Returns

Term level



1705
1706
1707
# File 'lib/semtools/ontology.rb', line 1705

def get_term_level(term)
  return @dicts[:level][:byValue][term]
end

#get_term_linked_profiles(term) ⇒ Object

Get related profiles to a given term

Parameters
term

to be checked

Returns

profiles which contains given term



1764
1765
1766
# File 'lib/semtools/ontology.rb', line 1764

def get_term_linked_profiles(term)
  return @profilesDict[term]
end

#get_terms_linked_profilesObject

Gets profiles dictionary calculated

Return

profiles dictionary (clone)



1754
1755
1756
# File 'lib/semtools/ontology.rb', line 1754

def get_terms_linked_profiles
  return @profilesDict.clone
end

#is_alternative?(term) ⇒ Boolean

Check if a term given is marked as alternative

Returns:

  • (Boolean)


1621
1622
1623
# File 'lib/semtools/ontology.rb', line 1621

def is_alternative? term
  return @alternatives_index.include?(term)
end

#is_number?(string) ⇒ Boolean

Returns:

  • (Boolean)


908
909
910
# File 'lib/semtools/ontology.rb', line 908

def is_number? string
    true if Float(string) rescue false
end

#is_obsolete?(term) ⇒ Boolean

Check if a term given is marked as obsolete

Returns:

  • (Boolean)


1616
1617
1618
# File 'lib/semtools/ontology.rb', line 1616

def is_obsolete? term
  return @obsoletes_index.include?(term)
end

#is_removable(id) ⇒ Object

Check if a given ID is a removable (blacklist) term. DEPRECATED use is_removable? instead

Parameters
id

to be checked

Returns

true if given term is a removable (blacklist) term or false in other cases



1973
1974
1975
1976
# File 'lib/semtools/ontology.rb', line 1973

def is_removable(id)
  warn "[DEPRECATION] `is_removable` is deprecated.  Please use `is_removable?` instead."
  return @removable_terms.include?(id.to_sym)
end

#is_removable?(id) ⇒ Boolean

Check if a given ID is a removable (blacklist) term

Parameters
id

to be checked

Returns

true if given term is a removable (blacklist) term or false in other cases

Returns:

  • (Boolean)


1983
1984
1985
# File 'lib/semtools/ontology.rb', line 1983

def is_removable? id
  return @removable_terms.include?(id.to_sym)
end

#load(file, build: true) ⇒ Object

Method used to load information stored into an OBO file and store it into this object. If a file is specified by input parameter, current @file value is updated

Parameters
file

optional file to update object stored file



866
867
868
869
870
871
872
873
# File 'lib/semtools/ontology.rb', line 866

def load(file, build: true)
  _, header, stanzas = self.class.load_obo(file)
  @header = header
  @stanzas = stanzas
  self.remove_removable()
  # @removable_terms.each{|removableID| @stanzas[:terms].delete(removableID)} if !@removable_terms.empty? # Remove if proceed
  self.build_index() if build
end

#load_item_relations_to_terms(relations, remove_old_relations = false, expand = false) ⇒ Object

Store specific relations hash given into ITEMS structure

Parameters
relations

to be stored

remove_old_relations

substitute ITEMS structure instead of merge new relations

expand

if true, already stored keys will be updated with the unique union of both sets



1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
# File 'lib/semtools/ontology.rb', line 1789

def load_item_relations_to_terms(relations, remove_old_relations = false, expand = false)
  @items = {} if remove_old_relations
  if !relations.select{|term, items| !@stanzas[:terms].include?(term)}.empty?
    warn('Some terms specified are not stored into this ontology. These not correct terms will be stored too')
  end
  if !remove_old_relations
    if !relations.select{|term, items| @items.include?(term)}.empty? && !expand
      warn('Some terms given are already stored. Stored version will be replaced')
    end
  end
  if expand
    relations.each do |k,v|
      if @items.keys.include?(k)
        @items[k] = (@items[k] + v).uniq
      else
        @items[k] = v
      end
    end
  else
    @items.merge!(relations)
  end
end

#load_profiles(profiles, calc_metadata: true, reset_stored: false, substitute: false) ⇒ Object

Method used to store a pull of profiles

Parameters
profiles

array/hash of profiles to be stored. If it’s an array, numerical IDs will be assigned starting at 1

calc_metadata

if true, launch calc_profiles_dictionary process

reset_stored

if true, remove already stored profiles

substitute

subsstitute flag from check_ids



1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
# File 'lib/semtools/ontology.rb', line 1286

def load_profiles(profiles, calc_metadata: true, reset_stored: false, substitute: false)
  self.reset_profiles if reset_stored
  # Check
  if profiles.kind_of?(Array)
    profiles.each_with_index do |items, i|
      self.add_profile(i, items.map {|item| item.to_sym}, substitute: substitute)
    end
  else # Hash
    if !profiles.keys.select{|id| @profiles.include?(id)}.empty?
      warn('Some profiles given are already stored. Stored version will be replaced')
    end
    profiles.each{|id, prof| self.add_profile(id, prof, substitute: substitute)}
  end

  self.add_observed_terms_from_profiles(reset: true)

  if 
    self.calc_profiles_dictionary
  end
end

#parentals_per_profileObject

Calculates number of ancestors present (redundant) in each profile stored

Returns

array of parentals for each profile



1544
1545
1546
1547
1548
# File 'lib/semtools/ontology.rb', line 1544

def parentals_per_profile
  cleaned_profiles = self.clean_profiles(remove_alternatives: false)
  parentals = @profiles.map{ |id, terms| terms.length - cleaned_profiles[id].length}
  return parentals
end

#profile_names(prof) ⇒ Object

Translate a given profile to terms names

Parameters
prof

array of terms to be translated

Returns

array of translated terms. Can include nils if some IDs are not allowed



1371
1372
1373
# File 'lib/semtools/ontology.rb', line 1371

def profile_names(prof)
  return prof.map{|term| self.translate_id(term)}
end

#read(file) ⇒ Object

Read a JSON file with an OBO_Handler object stored

Parameters
file

with object info

Return

OBO_Handler internal fields



918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
# File 'lib/semtools/ontology.rb', line 918

def read(file)
  # Read file
  jsonFile = File.open(file)
  jsonInfo = JSON.parse(jsonFile.read, :symbolize_names => true)
  # Pre-process (Symbolize some hashs values)
  jsonInfo[:stanzas][:terms].map{|id,info| self.class.symbolize_ids(info)} # STANZAS
  jsonInfo[:stanzas][:typedefs].map{|id,info| self.class.symbolize_ids(info)}
  jsonInfo[:stanzas][:instances].map{|id,info| self.class.symbolize_ids(info)}
  jsonInfo[:alternatives_index] = jsonInfo[:alternatives_index].map{|id,value| [id, value.to_sym]}.to_h 
  jsonInfo[:ancestors_index].map {|id,family_arr| family_arr.map!{|item| item.to_sym}}
  jsonInfo[:descendants_index].map {|id,family_arr| family_arr.map!{|item| item.to_sym}}
  jsonInfo[:obsoletes_index] = jsonInfo[:obsoletes_index].map{|id,value| [id, value.to_sym]}.to_h
  jsonInfo[:dicts] = jsonInfo[:dicts].each do |flag, dictionaries|
    # Special case: byTerm
    dictionaries[:byTerm] = dictionaries[:byTerm].map do |term, value| 
      if !term.to_s.scan(/\A[-+]?[0-9]*\.?[0-9]+\Z/).empty?  # Numeric dictionary
        [term.to_s.to_i, value.map{|term| term.to_sym}]
      elsif value.is_a? Numeric # Numeric dictionary
        [term.to_sym, value]
      elsif value.kind_of?(Array) && flag == :is_a
        [term.to_sym, value.map{|v| v.to_sym}]
      else
        [term.to_sym, value]
      end
    end
    dictionaries[:byTerm] = dictionaries[:byTerm].to_h
    # By value
    dictionaries[:byValue] = dictionaries[:byValue].map do |value, term| 
      if value.is_a? Numeric # Numeric dictionary
        [value, term.to_sym]
      elsif term.is_a? Numeric # Numeric dictionary
        [value.to_s.to_sym, term]
      elsif flag == :is_a
        [value.to_sym, term.map{|v| v.to_sym}]
      elsif term.kind_of?(Array)
        [value.to_sym, term.map{|t| t.to_sym}]
      else
        [value.to_s, term.to_sym]
      end
    end
    dictionaries[:byValue] = dictionaries[:byValue].to_h
  end 
  jsonInfo[:profiles].map{|id,terms| terms.map!{|term| term.to_sym}}
  jsonInfo[:profiles].keys.map{|id| jsonInfo[:profiles][id.to_s.to_i] = jsonInfo[:profiles].delete(id) if self.is_number?(id.to_s)}
  jsonInfo[:profilesDict].map{|term,ids| ids.map!{|id| id.to_sym if !id.is_a?(Numeric)}}
  jsonInfo[:removable_terms] = jsonInfo[:removable_terms].map{|term| term.to_sym}
  jsonInfo[:special_tags] = jsonInfo[:special_tags].each do |k, v|
    if v.kind_of?(Array)
      jsonInfo[:special_tags][k] = v.map{|tag| tag.to_sym}
    else
      jsonInfo[:special_tags][k] = v.to_sym
    end
  end
  jsonInfo[:items].each{|k,v| jsonInfo[:items][k] = v.map{|item| item.to_sym}}
  jsonInfo[:term_paths].each{|term,info| jsonInfo[:term_paths][term][:paths] = info[:paths].map{|path| path.map{|t| t.to_sym}}}
  # Store info
  @header = jsonInfo[:header]
  @stanzas = jsonInfo[:stanzas]
  @ancestors_index = jsonInfo[:ancestors_index]
  @descendants_index = jsonInfo[:descendants_index]
  @alternatives_index = jsonInfo[:alternatives_index]
  @obsoletes_index = jsonInfo[:obsoletes_index]
  @structureType = jsonInfo[:structureType].to_sym
  @ics = jsonInfo[:ics]
  @meta = jsonInfo[:meta]
  @special_tags = jsonInfo[:special_tags]
  @max_freqs = jsonInfo[:max_freqs]
  @dicts = jsonInfo[:dicts]
  @profiles = jsonInfo[:profiles]
  @profilesDict = jsonInfo[:profilesDict]
  @items = jsonInfo[:items]
  @removable_terms = jsonInfo[:removable_terms]
  @term_paths = jsonInfo[:term_paths]
end

#remove_alternatives_from_profile(prof) ⇒ Object

Remove alternative IDs if official ID is present. DOES NOT REMOVE synonyms or alternative IDs of the same official ID

Parameters
prof

array of terms to be checked

Returns

two arrays, first is the cleaned profile and second is the removed elements array



1503
1504
1505
1506
1507
# File 'lib/semtools/ontology.rb', line 1503

def remove_alternatives_from_profile(prof)
  alternatives = prof.select{|term| @alternatives_index.include?(term)}
  redundant = alternatives.select{|alt_id| prof.include?(@alternatives_index[alt_id])}
  return prof - redundant, redundant
end

#remove_ancestors_from_profile(prof) ⇒ Object

Clean a given profile returning cleaned set of terms and removed ancestors term.

Parameters
prof

array of terms to be checked

Returns

two arrays, first is the cleaned profile and second is the removed elements array



1491
1492
1493
1494
1495
# File 'lib/semtools/ontology.rb', line 1491

def remove_ancestors_from_profile(prof)
  ancestors = prof.map{|term| self.get_ancestors(term)}.flatten.uniq
  redundant = prof.select{|term| ancestors.include?(term)}
  return prof - redundant, redundant
end

#remove_removableObject



876
877
878
# File 'lib/semtools/ontology.rb', line 876

def remove_removable()
  @removable_terms.each{|removableID| @stanzas[:terms].delete(removableID)} if !@removable_terms.empty? # Remove if proceed
end

#reset_profilesObject

Internal method used to remove already stored profiles and restore observed frequencies



1309
1310
1311
1312
1313
1314
1315
# File 'lib/semtools/ontology.rb', line 1309

def reset_profiles
  # Clean profiles storage
  @profiles = {}
  # Reset frequency observed
  @meta.each{|term,info| info[:observed_freq] = 0}
  @max_freqs[:observed_freq] = 0
end

#set_items_from_dict(dictID, remove_old_relations = false) ⇒ Object

Assign a dictionary already calculated as a items set.

Parameters
dictID

dictionary ID to be stored (:byTerm will be used)



1816
1817
1818
1819
1820
1821
1822
1823
# File 'lib/semtools/ontology.rb', line 1816

def set_items_from_dict(dictID, remove_old_relations = false)
  @items = {} if remove_old_relations
  if(@dicts.keys.include?(dictID))
    @items.merge(@dicts[dictID][:byTerm])
  else
    warn('Specified ID is not calculated. Dict will not be added as a items set')
  end
end

#translate(toTranslate, tag, byValue: true) ⇒ Object

Translate a given value using an already calcualted dictionary

Parameters
toTranslate

value to be translated using dictiontionary

tag

used to generate the dictionary

byValue

boolean flag to indicate if dictionary must be used values as keys or terms as keys. Default: values as keys = true

Return

translation



1153
1154
1155
1156
1157
# File 'lib/semtools/ontology.rb', line 1153

def translate(toTranslate, tag, byValue: true)
  dict = byValue ? @dicts[tag][:byValue] : @dicts[tag][:byTerm]  
  toTranslate =  get_main_id(toTranslate) if !byValue
  return dict[toTranslate]
end

#translate_id(id) ⇒ Object

Translates a given ID to it assigned name

Parameters
id

to be translated

Return

main name or nil if it’s not included into this ontology



1197
1198
1199
1200
# File 'lib/semtools/ontology.rb', line 1197

def translate_id(id)
  name = self.translate(id, :name, byValue: false)
  return name.nil? ? nil : name.first
end

#translate_ids(ids) ⇒ Object

Translates several IDs and returns translations and not allowed IDs list

Parameters
ids

to be translated

Return

two arrays with translations and names which couldn’t be translated respectively



1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
# File 'lib/semtools/ontology.rb', line 1208

def translate_ids(ids)
  translated = []
  rejected = []
  ids.each do |term_id|
    tr = self.translate_id(term_id.to_sym)
    if !tr.nil?
      translated << tr
    else
      rejected << tr
    end
  end
  return translated, rejected
end

#translate_name(name) ⇒ Object

Translate a name given

Parameters
name

to be translated

Return

translated name or nil if it’s not stored into this ontology



1165
1166
1167
1168
1169
# File 'lib/semtools/ontology.rb', line 1165

def translate_name(name)
  term = self.translate(name, :name)
  term = self.translate(name, :synonym) if term.nil?
  return term      
end

#translate_names(names) ⇒ Object

Translate several names and return translations and a list of names which couldn’t be translated

Parameters
names

array to be translated

Return

two arrays with translations and names which couldn’t be translated respectively



1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
# File 'lib/semtools/ontology.rb', line 1177

def translate_names(names)
  translated = []
  rejected = []
  names.each do |name|
    tr = self.translate_name(name)
    if tr.nil?
      rejected << name
    else
      translated << tr
    end
  end
  return translated, rejected
end

#translate_profiles_ids(profs = [], asArray: true) ⇒ Object

Trnaslates a bunch of profiles to it sets of term names

Parameters
profs

array of profiles

asArray

flag to indicate if results must be returned as: true => an array of tuples [ProfID, ArrayOdNames] or ; false => hashs of translations

Returns

translated profiles



1382
1383
1384
1385
1386
1387
# File 'lib/semtools/ontology.rb', line 1382

def translate_profiles_ids(profs = [], asArray: true)
  profs = @profiles if profs.empty?
  profs = profs.each_with_index.map{|terms, index| [index, terms]}.to_h if profs.kind_of?(Array)
  profs_names = profs.map{|id, terms| [id, self.profile_names(terms)]}.to_h
  return asArray ? profs_names.values : profs_names
end

#write(file) ⇒ Object

Exports an OBO_Handler object in json format

Parameters
file

where info will be stored



884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
# File 'lib/semtools/ontology.rb', line 884

def write(file)
  # Take object stored info
  obj_info = {header: @header,
        stanzas: @stanzas,
        ancestors_index: @ancestors_index,
        descendants_index: @descendants_index,
        alternatives_index: @alternatives_index,
        obsoletes_index: @obsoletes_index,
        structureType: @structureType,
        ics: @ics,
        meta: @meta,
        special_tags: @special_tags,
        max_freqs: @max_freqs,
        dicts: @dicts,
        profiles: @profiles,
        profilesDict: @profilesDict,
        items: @items,
        removable_terms: @removable_terms,
        term_paths: @term_paths}
  # Convert to JSON format & write
  File.open(file, "w") { |f| f.write obj_info.to_json }
end