Parent

Taps::DataStream

Attributes

db[R]
state[R]

Public Class Methods

factory(db, state) click to toggle source
# File lib/taps/data_stream.rb, line 202
def self.factory(db, state)
        if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
                Sequel::MySQL.convert_invalid_date_time = :nil
        end

        if state.has_key?(:klass)
                return eval(state[:klass]).new(db, state)
        end

        if Taps::Utils.single_integer_primary_key(db, state[:table_name].to_sym)
                DataStreamKeyed.new(db, state)
        else
                DataStream.new(db, state)
        end
end
new(db, state) click to toggle source
# File lib/taps/data_stream.rb, line 14
def initialize(db, state)
        @db = db
        @state = {
                :offset => 0,
                :avg_chunksize => 0,
                :num_chunksize => 0,
                :total_chunksize => 0,
        }.merge(state)
        @complete = false
end
parse_json(json) click to toggle source
# File lib/taps/data_stream.rb, line 177
def self.parse_json(json)
        hash = JSON.parse(json).symbolize_keys
        hash[:state].symbolize_keys! if hash.has_key?(:state)
        hash
end

Public Instance Methods

complete?() click to toggle source
# File lib/taps/data_stream.rb, line 121
def complete?
        @complete
end
encode_rows(rows) click to toggle source
# File lib/taps/data_stream.rb, line 103
def encode_rows(rows)
        Taps::Utils.base64encode(Marshal.dump(rows))
end
error() click to toggle source
# File lib/taps/data_stream.rb, line 33
def error
        state[:error] || false
end
error=(val) click to toggle source
# File lib/taps/data_stream.rb, line 29
def error=(val)
        state[:error] = val
end
fetch() click to toggle source
# File lib/taps/data_stream.rb, line 107
def fetch
        log.debug "DataStream#fetch state -> #{state.inspect}"

        t1 = Time.now
        rows = fetch_rows
        encoded_data = encode_rows(rows)
        t2 = Time.now
        elapsed_time = t2 - t1

        @complete = rows == { }

        [encoded_data, (@complete ? 0 : rows[:data].size), elapsed_time]
end
fetch_chunksize() click to toggle source
# File lib/taps/data_stream.rb, line 88
def fetch_chunksize
        chunksize = state[:chunksize]
        return chunksize if state[:num_chunksize] < max_chunksize_training
        return chunksize if state[:avg_chunksize] == 0
        return chunksize if state[:error]
        state[:avg_chunksize] > chunksize ? state[:avg_chunksize] : chunksize
end
fetch_from_resource(resource, headers) click to toggle source
# File lib/taps/data_stream.rb, line 160
def fetch_from_resource(resource, headers)
        res = nil
        log.debug "DataStream#fetch_from_resource state -> #{state.inspect}"
        state[:chunksize] = Taps::Utils.calculate_chunksize(state[:chunksize]) do |c|
                state[:chunksize] = c
                res = resource.post({:state => self.to_json}, headers)
        end

        begin
                params = Taps::Multipart.parse(res)
                params[:json] = self.class.parse_json(params[:json]) if params.has_key?(:json)
                return params
        rescue JSON::Parser
                raise DataStream::CorruptedData.new("Invalid JSON Received")
        end
end
fetch_remote(resource, headers) click to toggle source
# File lib/taps/data_stream.rb, line 125
def fetch_remote(resource, headers)
        params = fetch_from_resource(resource, headers)
        encoded_data = params[:encoded_data]
        json = params[:json]

        rows = parse_encoded_data(encoded_data, json[:checksum])
        @complete = rows == { }

        # update local state
        state.merge!(json[:state].merge(:chunksize => state[:chunksize]))

        unless @complete
                import_rows(rows)
                rows[:data].size
        else
                0
        end
end
fetch_remote_in_server(params) click to toggle source

this one is used inside the server process

# File lib/taps/data_stream.rb, line 145
def fetch_remote_in_server(params)
        json = self.class.parse_json(params[:json])
        encoded_data = params[:encoded_data]

        rows = parse_encoded_data(encoded_data, json[:checksum])
        @complete = rows == { }

        unless @complete
                import_rows(rows)
                rows[:data].size
        else
                0
        end
end
fetch_rows() click to toggle source

keep a record of the average chunksize within the first few hundred thousand records, after chunksize goes below 100 or maybe if offset is > 1000

# File lib/taps/data_stream.rb, line 74
def fetch_rows
        state[:chunksize] = fetch_chunksize
        ds = table.order(*order_by).limit(state[:chunksize], state[:offset])
        log.debug "DataStream#fetch_rows SQL -> #{ds.sql}"
        rows = Taps::Utils.format_data(ds.all,
                :string_columns => string_columns)
        update_chunksize_stats
        rows
end
import_rows(rows) click to toggle source
# File lib/taps/data_stream.rb, line 197
def import_rows(rows)
        table.import(rows[:header], rows[:data])
        state[:offset] += rows[:data].size
end
increment(row_count) click to toggle source
# File lib/taps/data_stream.rb, line 68
def increment(row_count)
        state[:offset] += row_count
end
log() click to toggle source
# File lib/taps/data_stream.rb, line 25
def log
        Taps.log
end
max_chunksize_training() click to toggle source
# File lib/taps/data_stream.rb, line 84
def max_chunksize_training
        20
end
order_by(name=nil) click to toggle source
# File lib/taps/data_stream.rb, line 61
def order_by(name=nil)
        @order_by ||= begin
                name ||= table_name
                Taps::Utils.order_by(db, name)
        end
end
parse_encoded_data(encoded_data, checksum) click to toggle source
# File lib/taps/data_stream.rb, line 183
def parse_encoded_data(encoded_data, checksum)
        raise DataStream::CorruptedData.new("Checksum Failed") unless Taps::Utils.valid_data?(encoded_data, checksum)

        begin
                return Marshal.load(Taps::Utils.base64decode(encoded_data))
        rescue Object => e
                unless ENV['NO_DUMP_MARSHAL_ERRORS']
                        puts "Error encountered loading data, wrote the data chunk to dump.#{Process.pid}.dat"
                        File.open("dump.#{Process.pid}.dat", "w") { |f| f.write(encoded_data) }
                end
                raise
        end
end
string_columns() click to toggle source
# File lib/taps/data_stream.rb, line 53
def string_columns
        @string_columns ||= Taps::Utils.incorrect_blobs(db, table_name)
end
table() click to toggle source
# File lib/taps/data_stream.rb, line 57
def table
        @table ||= db[table_name_sql]
end
table_name() click to toggle source
# File lib/taps/data_stream.rb, line 37
def table_name
        state[:table_name].to_sym
end
table_name_sql() click to toggle source
# File lib/taps/data_stream.rb, line 41
def table_name_sql
        table_name.identifier
end
to_hash() click to toggle source
# File lib/taps/data_stream.rb, line 45
def to_hash
        state.merge(:klass => self.class.to_s)
end
to_json() click to toggle source
# File lib/taps/data_stream.rb, line 49
def to_json
        to_hash.to_json
end
update_chunksize_stats() click to toggle source
# File lib/taps/data_stream.rb, line 96
def update_chunksize_stats
        return if state[:num_chunksize] >= max_chunksize_training
        state[:total_chunksize] += state[:chunksize]
        state[:num_chunksize] += 1
        state[:avg_chunksize] = state[:total_chunksize] / state[:num_chunksize] rescue state[:chunksize]
end

[Validate]

Generated with the Darkfish Rdoc Generator 2.