Skip to content

Remove dump/undump functionality from Tokens #138

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 15, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 14 additions & 40 deletions bench/bench.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@
lang = ARGV.fetch(0) do
puts <<-HELP
Usage:
ruby bench.rb (c|ruby|dump) (null|text|tokens|count|statistic|yaml|html) [size in kB] [stream]
ruby bench.rb (c|ruby) (null|text|tokens|count|statistic|yaml|html) [size in kB] [stream]

SIZE defaults to 100 kB (= 100,000 bytes).
SIZE = 0 means the whole input.
SIZE is ignored when dump is input.

-p generates a profile (slow! use with SIZE = 1)
-o shows the output
Expand Down Expand Up @@ -48,10 +47,6 @@
end
end

$dump_input = lang == 'dump'
$dump_output = format == 'dump'
require 'coderay/helpers/gzip_simple.rb' if $dump_input

def here fn = nil
return MYDIR unless fn
File.join here, fn
Expand All @@ -66,59 +61,38 @@ def here fn = nil

data = nil
File.open(here("#$filename." + lang), 'rb') { |f| data = f.read }
if $dump_input
@size = CodeRay::Tokens.load(data).text.size
else
raise 'Example file is empty.' if data.empty?
unless @size.zero?
data += data until data.size >= @size
data = data[0, @size]
end
@size = data.size
raise 'Example file is empty.' if data.empty?
unless @size.zero?
data += data until data.size >= @size
data = data[0, @size]
end

@size = data.size

options = {
:tab_width => 2,
# :line_numbers => :inline,
:css => $style ? :style : :class,
}
$hl = CodeRay.encoder(format, options) unless $dump_output
$hl = CodeRay.encoder(format, options)
time = bm.report('CodeRay') do
if $stream || true
if $dump_input
raise 'Can\'t stream dump.'
elsif $dump_output
raise 'Can\'t dump stream.'
end
$o = $hl.encode(data, lang, options)
else
if $dump_input
tokens = CodeRay::Tokens.load data
else
tokens = CodeRay.scan(data, lang)
end
tokens = CodeRay.scan(data, lang)
tokens.optimize! if $optimize
if $dump_output
$o = tokens.optimize.dump
else
$o = tokens.encode($hl)
end
$o = tokens.encode($hl)
end
end
$file_created = here('test.' +
($dump_output ? 'dump' : $hl.file_extension))
$file_created = here('test.' + $hl.file_extension)
File.open($file_created, 'wb') do |f|
# f.write $o
end
Dir.chdir(here) do
FileUtils.copy 'test.dump', 'example.dump' if $dump_output
end


time_real = time.real

puts "\t%7.2f KB/s (%d.%d KB)" % [((@size / 1000.0) / time_real), @size / 1000, @size % 1000]
puts $o if ARGV.include? '-o'

end
end
puts "Files created: #$file_created"
Expand Down
1 change: 0 additions & 1 deletion lib/coderay/encoders/debug.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ module Encoders
#
# You cannot fully restore the tokens information from the
# output, because consecutive :space tokens are merged.
# Use Tokens#dump for caching purposes.
#
# See also: Scanners::Debug
class Debug < Encoder
Expand Down
41 changes: 0 additions & 41 deletions lib/coderay/helpers/gzip.rb

This file was deleted.

45 changes: 0 additions & 45 deletions lib/coderay/tokens.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
module CodeRay

# GZip library for writing and reading token dumps.
autoload :GZip, coderay_path('helpers', 'gzip')

# The Tokens class represents a list of tokens returned from
# a Scanner. It's actually just an Array with a few helper methods.
#
Expand Down Expand Up @@ -148,53 +145,11 @@ def split_into_parts *sizes
parts
end

# Dumps the object into a String that can be saved
# in files or databases.
#
# The dump is created with Marshal.dump;
# In addition, it is gzipped using GZip.gzip.
#
# The returned String object includes Undumping
# so it has an #undump method. See Tokens.load.
#
# You can configure the level of compression,
# but the default value 7 should be what you want
# in most cases as it is a good compromise between
# speed and compression rate.
#
# See GZip module.
def dump gzip_level = 7
dump = Marshal.dump self
dump = GZip.gzip dump, gzip_level
dump.extend Undumping
end

# Return the actual number of tokens.
def count
size / 2
end

# Include this module to give an object an #undump
# method.
#
# The string returned by Tokens.dump includes Undumping.
module Undumping
# Calls Tokens.load with itself.
def undump
Tokens.load self
end
end

# Undump the object using Marshal.load, then
# unzip it using GZip.gunzip.
#
# The result is commonly a Tokens object, but
# this is not guaranteed.
def Tokens.load dump
dump = GZip.gunzip dump
@dump = Marshal.load dump
end

alias text_token push
def begin_group kind; push :begin_group, kind end
def end_group kind; push :end_group, kind end
Expand Down
9 changes: 0 additions & 9 deletions test/unit/tokens.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,6 @@ def test_adding_tokens
assert_equal tokens.count, 4
end

def test_dump_undump
tokens = make_tokens
tokens2 = nil
assert_nothing_raised do
tokens2 = tokens.dump.undump
end
assert_equal tokens, tokens2
end

def test_to_s
assert_equal 'string()', make_tokens.to_s
end
Expand Down