Class: YoutubeCaptions
- Inherits:
-
Object
- Object
- YoutubeCaptions
- Includes:
- HTTParty
- Defined in:
- lib/youtube-captions.rb
Constant Summary collapse
- TRANSLATABLE_REGEX =
/({"captionTracks":.*"isTranslatable":(true|false)}\])/
Instance Attribute Summary collapse
-
#id ⇒ Object
readonly
Returns the value of attribute id.
-
#lang ⇒ Object
readonly
Returns the value of attribute lang.
Instance Method Summary collapse
- #call ⇒ Object
-
#initialize(id:, lang: nil) ⇒ YoutubeCaptions
constructor
A new instance of YoutubeCaptions.
Constructor Details
#initialize(id:, lang: nil) ⇒ YoutubeCaptions
Returns a new instance of YoutubeCaptions.
8 9 10 11 |
# File 'lib/youtube-captions.rb', line 8 def initialize(id:, lang: nil) @id = id @lang = lang end |
Instance Attribute Details
#id ⇒ Object (readonly)
Returns the value of attribute id.
7 8 9 |
# File 'lib/youtube-captions.rb', line 7 def id @id end |
#lang ⇒ Object (readonly)
Returns the value of attribute lang.
7 8 9 |
# File 'lib/youtube-captions.rb', line 7 def lang @lang end |
Instance Method Details
#call ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/youtube-captions.rb', line 15 def call youtube_html = self.class.get("https://www.youtube.com/watch?v=#{id}") match_data = youtube_html.match(TRANSLATABLE_REGEX) raise StandardError.new("There are no captions") unless match_data caption_tracks = JSON.parse("#{match_data[1]}}")["captionTracks"] if lang.present? subtitle = caption_tracks.find {|json| json["vssId"] == ".#{lang}"} || caption_tracks.find {|json| json["vssId"] == "a.#{lang}"} if !subtitle || (subtitle && !subtitle["baseUrl"]) raise ArgumentError.new("Lang no available") end else subtitle = caption_tracks.find {|json| json["vssId"] == ".en"} || caption_tracks.find {|json| json["vssId"] == "a.en"} subtitle = caption_tracks.first unless subtitle end transcript_html = self.class.get(subtitle["baseUrl"]) = transcript_html.to_s.gsub('<?xml version="1.0" encoding="utf-8" ?><transcript>', '').gsub('</transcript>', '').split('</text>').select {|line| line && line.strip} transcript_parts = .map do |transcript_tag| encoded_transcript = transcript_tag.gsub(/<text.+>/, '') .gsub("/&/gi", '&') .gsub("/<\/?[^>]+(>|$)/g", '') .gsub(/&#(\d+);/) { [$1.to_i].pack('U*') } CGI.unescapeHTML(encoded_transcript) end transcript_parts.join(" ") end |