DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

Lance has posted 24 posts at DZone. View Full User Profile

Generate Test Data In Rails Where Created_at Falls Along A Statistical Distribution

12.31.2010
| 3891 views |
  • submit to reddit
        
# http://stackoverflow.com/questions/3109670/generate-random-numbers-with-probabilistic-distribution
# http://rb-gsl.rubyforge.org/files/rdoc/randist_rdoc.html
# http://rb-gsl.rubyforge.org/files/rdoc/rng_rdoc.html
# http://www.gnu.org/software/gsl/manual/html_node/Random-number-generator-algorithms.html#Random-number-generator-algorithms
# http://rb-gsl.rubyforge.org/files/rdoc/randist_rdoc.html
# http://www.gnu.org/software/gsl/manual/html_node/Random-Number-Distributions.html
# http://rb-gsl.rubyforge.org/files/rdoc/hist2d_rdoc.html

# Install gsl scientific library
# brew install gsl
# gem install gsl

require 'rubygems'
require 'gsl'

# generate_time_plot :duration => 10.days, :interval => 6.hours, :count => 100, :range => [-2, 2]
def generate_time_plot(options = {})
  distribution  = options[:distribution]  || :gaussian
  count         = options[:count]
  duration      = options[:duration]
  interval      = options[:interval]
  bins          = duration.to_i / interval.to_i
  min_max       = options[:range] || [-5, 5] # not sure how this works yet, or what good numbers are
  
  range         = GSL::Rng.alloc(GSL::Rng::TAUS, 1)
  sigma         = 1.0
  histogram     = GSL::Histogram.alloc(bins, min_max)
  
  for i in 0...count do
    histogram.increment range.send(distribution, sigma)
  end
  
  bins.times.map { |i| histogram.get(i) }
end

puts generate_time_plot(:duration => 10.days, :interval => 6.hours, :count => 100, :range => [-5, 5]).inspect
# => [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 6.0, 7.0, 9.0, 8.0, 12.0, 7.0, 15.0, 11.0, 4.0, 5.0, 3.0, 4.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

def plot(options = {})
  generate_time_plot(options).each_with_index do |data, index|
    x = options[:start] + (options[:interval] * index)
    y = data
    yield options.merge(:x => x, :y => y, :index => index)
  end
end

plot(:start => 1.month.ago.to_time, :duration => 10.days, :interval => 6.hours, :count => 100, :range => [-5, 5]) do |column|
  next if column[:y].zero?
  
  column[:y].to_i.times do
    # https://github.com/harvesthq/time-warp, copied and modified below
    Time.is_within(column[:x], column[:x] + column[:interval]) do
      puts Time.now.to_s
      # generate test data here... User.create!
    end
  end
end

if !Time.respond_to?(:real_now)  # assures there is no infinite looping when aliasing #now
  Time.class_eval do
    class << self
      attr_accessor :testing_offset
      
      alias_method :real_now, :now
      def now
        real_now - testing_offset
      end
      alias_method :new, :now
      
      def is(time, &block)
        begin
          Time.testing_offset = Time.real_now - time
          yield
        ensure
          Time.testing_offset = 0
        end
      end
      
      def is_within(start_time, end_time, &block)
        is(start_time + rand(end_time.to_i - start_time.to_i), &block)
      end
    end
  end
end