Module: FeldtRuby::Statistics::Plotting
- Included in:
- RCommunicator
- Defined in:
- lib/feldtruby/statistics.rb
Overview
Plotting data sets in R with ggplot2 and save them to files.
Constant Summary collapse
- GfxFormatToGfxParams =
{ "pdf" => {:width => 7, :height => 5, :paper => 'special'}, "png" => {:units => "cm", :width => 12, :height => 8}, "tiff" => {:units => "cm", :width => 12, :height => 8}, }
Instance Method Summary collapse
- #density_tile2d(csvFilePath, xlabel, ylabel, title = "densitytile2d") ⇒ Object
- #filled_contour(csvFilePath, xlabel, ylabel, title = "filled.contour") ⇒ Object
- #gfx_device(format, width = nil, height = nil) ⇒ Object
- #ggplot2_setup_and_theme ⇒ Object
-
#hash_to_R_params(hash) ⇒ Object
Map a ruby hash of objects to parameters in R code/script.
- #hexbin_heatmap(csvFilePath, xlabel, ylabel, title = "heatmap", bins = 50) ⇒ Object
-
#load_csv_files_as_data(dataMap, columnName = nil) ⇒ Object
Data can be specified in two ways, either directly in Ruby arrays, or as strings with the path to a csv file to be loaded.
-
#overlaid_densities(dataMap, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") ⇒ Object
Overlaid density graph of the observations (sampled distributions) in data1 and data2.
-
#overlaid_densities_from_csv_files(columnName, csvFiles, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") ⇒ Object
Plot the densities of the data found in the column named columnName in the csv files in csvFiles.
- #plot_2dims(csvFilePath, plotCommand, xlabel, ylabel, title = "scatterplot") ⇒ Object
-
#save_graph(filename, width = nil, height = nil) ⇒ Object
Wrap a sve_graph call around a block that draws a diagram and this will save the diagram to a file.
- #scatter_plot(csvFilePath, xlabel, ylabel, title = "scatterplot") ⇒ Object
- #smooth_scatter_plot(csvFilePath, xlabel, ylabel, title = "smoothscatter") ⇒ Object
Instance Method Details
#density_tile2d(csvFilePath, xlabel, ylabel, title = "densitytile2d") ⇒ Object
342 343 344 345 346 347 348 349 350 351 |
# File 'lib/feldtruby/statistics.rb', line 342 def density_tile2d(csvFilePath, xlabel, ylabel, title = "densitytile2d") script = <<-EOS f <- ggplot(data, aes(x=#{xlabel}, y=#{ylabel})) f <- f + stat_density2d(geom="tile", aes(fill=..density..), contour=FALSE) + scale_fill_gradient(high="red", low="white") EOS plot_2dims(csvFilePath, script, xlabel.to_s, ylabel.to_s, title) end |
#filled_contour(csvFilePath, xlabel, ylabel, title = "filled.contour") ⇒ Object
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 |
# File 'lib/feldtruby/statistics.rb', line 258 def filled_contour(csvFilePath, xlabel, ylabel, title = "filled.contour") include_library "MASS" script = <<-EOS data <- read.csv(#{csvFilePath.inspect}) k <- with(data, MASS::kde2d(#{xlabel}, #{ylabel})) f <- filled.contour(k, color=topo.colors, plot.title=title(main = _title_), xlab=_xlabel_, ylab=_ylabel_) f EOS subst_eval script, {:title => title, :xlabel => xlabel.to_s, :ylabel => ylabel.to_s} end |
#gfx_device(format, width = nil, height = nil) ⇒ Object
219 220 221 222 223 224 225 226 227 228 229 230 231 |
# File 'lib/feldtruby/statistics.rb', line 219 def gfx_device(format, width = nil, height = nil) format = format.to_s # If given as a symbol instead of a string unless GfxFormatToGfxParams.has_key?(format) raise ArgumentError.new("Don't now about gfx format #{format}") end params = GfxFormatToGfxParams[format] "#{format}(#{hash_to_R_params(params)})" end |
#ggplot2_setup_and_theme ⇒ Object
381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 |
# File 'lib/feldtruby/statistics.rb', line 381 def ggplot2_setup_and_theme include_library("ggplot2") include_library("reshape2") script = <<-EOS f <- f + ggtitle(_title_) + xlab(_xlabel_) + ylab(_ylabel_) f <- f + theme_bw() f <- f + theme( plot.title = element_text(face="bold", size=12), axis.title.x = element_text(face="bold", size=10), axis.title.y = element_text(face="bold", size=10) ) EOS end |
#hash_to_R_params(hash) ⇒ Object
Map a ruby hash of objects to parameters in R code/script.
234 235 236 237 238 239 240 241 242 |
# File 'lib/feldtruby/statistics.rb', line 234 def hash_to_R_params(hash) hash.keys.sort.map do |key| "#{key.to_s} = #{ruby_object_to_R_string(hash[key])}" end.join(", ") end |
#hexbin_heatmap(csvFilePath, xlabel, ylabel, title = "heatmap", bins = 50) ⇒ Object
286 287 288 289 290 291 292 293 294 |
# File 'lib/feldtruby/statistics.rb', line 286 def hexbin_heatmap(csvFilePath, xlabel, ylabel, title = "heatmap", bins = 50) include_library "grid" include_library "lattice" include_library "hexbin" plot_2dims(csvFilePath, "f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_hex( bins = #{bins} )", xlabel, ylabel, title) end |
#load_csv_files_as_data(dataMap, columnName = nil) ⇒ Object
Data can be specified in two ways, either directly in Ruby arrays, or as strings with the path to a csv file to be loaded. In the latter case a column name must be given.
311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 |
# File 'lib/feldtruby/statistics.rb', line 311 def load_csv_files_as_data(dataMap, columnName = nil) keys = dataMap.keys.sort read_csvs = "" data_frame = "data.frame(1:length(d_#{keys.first})" keys.each_with_index do |key, i| value = dataMap[key] set_name = "d_#{key}" read_csvs += "#{set_name} <- " if Array === value read_csvs += (ruby_object_to_R_string(value) + ";\n") data_frame += ", #{key} = #{set_name}" else read_csvs += "read.csv(#{value.inspect});\n" data_frame += ", #{key} = #{set_name}$#{columnName}" end end data_frame += ")" script = "#{read_csvs}data <- #{data_frame};" end |
#overlaid_densities(dataMap, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") ⇒ Object
Overlaid density graph of the observations (sampled distributions) in data1 and data2. The dataMap maps the name of each data series to an array with its observations.
401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 |
# File 'lib/feldtruby/statistics.rb', line 401 def overlaid_densities(dataMap, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") cardinalities = dataMap.values.map {|vs| vs.length}.uniq unless cardinalities.length == 1 raise ArgumentError.new("Must have same cardinality") end script = <<-EOS df <- data.frame(index = (1:#{cardinalities.first}), #{hash_to_R_params(dataMap)}) df.m <- melt(df, id = "index") names(df.m)[2] <- _datasetsName_ f <- ggplot(df.m, aes(value, fill=#{datasetsName})) f <- f + geom_density(alpha = 0.2, size = 0.5) + scale_color_brewer() #{ggplot2_setup_and_theme()} f EOS subst_eval script, {:title => title, :datasetsName => datasetsName, :xlabel => xlabel, :ylabel => ylabel} end |
#overlaid_densities_from_csv_files(columnName, csvFiles, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") ⇒ Object
Plot the densities of the data found in the column named columnName in the csv files in csvFiles.
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 |
# File 'lib/feldtruby/statistics.rb', line 428 def overlaid_densities_from_csv_files(columnName, csvFiles, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density") load_csvs = load_csv_files_as_data csvFiles script = <<-EOS #{load_csvs} #df <- data.frame(index = (1:#{cardinalities.first}), #{hash_to_R_params(dataMap)}) df.m <- melt(df, id = "index") names(df.m)[2] <- _datasetsName_ f <- ggplot(df.m, aes(value, fill=#{datasetsName})) f <- f + geom_density(alpha = 0.2, size = 0.5) + scale_color_brewer() #{ggplot2_setup_and_theme()} f EOS puts script subst_eval script, {:title => title, :datasetsName => datasetsName, :xlabel => xlabel, :ylabel => ylabel} end |
#plot_2dims(csvFilePath, plotCommand, xlabel, ylabel, title = "scatterplot") ⇒ Object
244 245 246 247 248 249 250 251 252 253 254 255 256 |
# File 'lib/feldtruby/statistics.rb', line 244 def plot_2dims(csvFilePath, plotCommand, xlabel, ylabel, title = "scatterplot") script = <<-EOS data <- read.csv(#{csvFilePath.inspect}) #{plotCommand} #{ggplot2_setup_and_theme()} f EOS subst_eval script, {:title => title, :xlabel => xlabel, :ylabel => ylabel} end |
#save_graph(filename, width = nil, height = nil) ⇒ Object
Wrap a sve_graph call around a block that draws a diagram and this will save the diagram to a file. The filetype is given by the file ending of the file name.
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 |
# File 'lib/feldtruby/statistics.rb', line 362 def save_graph(filename, width = nil, height = nil) file_ending = filename.split(".").last raise "Don't now about graphics format #{file_ending}" unless GfxFormatToGfxParams.has_key?(file_ending) params = GfxFormatToGfxParams[file_ending].clone params[:width] = width if width params[:height] = height if height RC.eval("#{file_ending}(#{filename.inspect}, #{hash_to_R_params(params)})") yield() # Just be sure not to nest these save_graph calls within each other... RC.eval("dev.off()") end |
#scatter_plot(csvFilePath, xlabel, ylabel, title = "scatterplot") ⇒ Object
296 297 298 299 300 301 302 303 304 305 306 |
# File 'lib/feldtruby/statistics.rb', line 296 def scatter_plot(csvFilePath, xlabel, ylabel, title = "scatterplot") script = <<-EOS smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'} f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_point(shape = 1) f <- f + stat_smooth(method = smoothing_method) EOS plot_2dims(csvFilePath, script, xlabel.to_s, ylabel.to_s, title) end |
#smooth_scatter_plot(csvFilePath, xlabel, ylabel, title = "smoothscatter") ⇒ Object
275 276 277 278 279 280 281 282 283 284 |
# File 'lib/feldtruby/statistics.rb', line 275 def smooth_scatter_plot(csvFilePath, xlabel, ylabel, title = "smoothscatter") include_library "graphics" script = <<-EOS f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_point() + geom_smooth( method="loess", se = FALSE ) EOS plot_2dims(csvFilePath, script, xlabel.to_s, ylabel.to_s, title) end |