diff --git a/.github/workflows/assets.yml b/.github/workflows/assets.yml index ae572e5e..39c128e6 100644 --- a/.github/workflows/assets.yml +++ b/.github/workflows/assets.yml @@ -4,6 +4,7 @@ on: push: tags: - 'v*' + workflow_dispatch: jobs: assets: @@ -12,18 +13,28 @@ jobs: BUNDLE_WITHOUT: "secryst:jsexec" SKIP_JS: "1" steps: - - name: Checkout repository and submodules + - name: Checkout repository uses: actions/checkout@v2 with: - submodules: true + repository: interscript/interscript + + - name: Run bootstrap script + run: ruby bootstrap.rb + - name: Use Ruby uses: ruby/setup-ruby@v1 with: - ruby-version: 3.0 + ruby-version: "3.0" bundler-cache: true - working-directory: ./ruby + + - name: Install bundle + working-directory: ./ruby + run: bundle install --jobs 4 --retry 3 --with jsexec --without secryst + - name: Generate visualization json - run: pushd ruby; bundle exec rake generate_visualization_json; popd + working-directory: ./ruby + run: bundle exec rake generate_visualization_json + - name: Archive json files from the previous step uses: thedoctor0/zip-release@master with: @@ -32,6 +43,7 @@ jobs: directory: ./ruby/ exclusions: '*.git*' type: zip + - name: Upload artifacts id: upload_vis_json uses: svenstaro/upload-release-action@2.2.1 @@ -41,8 +53,11 @@ jobs: file_glob: true tag: ${{ github.ref }} overwrite: true + - name: Generate metadata - run: pushd ruby; bundle exec rake generate_metadata_json; popd + working-directory: ./ruby + run: bundle exec rake generate_metadata_json + - name: Archive metadata from the previous step uses: thedoctor0/zip-release@master with: @@ -51,6 +66,7 @@ jobs: directory: ./ruby/ exclusions: '*.git*' type: zip + - name: Upload metadata id: upload_metadata uses: svenstaro/upload-release-action@2.2.1 @@ -60,11 +76,13 @@ jobs: asset_name: metadata.json.zip tag: ${{ github.ref }} overwrite: true + - name: Output link run: | echo ${{ steps.upload_vis_json.outputs.browser_download_url }} echo ${{ steps.upload_metadata.outputs.browser_download_url }} -# - name: Trigger interscript.org + +# - name: Trigger deploy at interscript.org # uses: peter-evans/repository-dispatch@v1 # with: # token: ${{ secrets.INTERSCRIPT_CI_TOKEN }} diff --git a/.github/workflows/rake.yml b/.github/workflows/rake.yml index 9ab9f3ab..6a789f3e 100644 --- a/.github/workflows/rake.yml +++ b/.github/workflows/rake.yml @@ -2,54 +2,42 @@ name: rake on: push: - branches: [ master, main ] + branches: [ main, v*, ci-check ] tags: [ v* ] pull_request: -defaults: - run: - working-directory: ./ruby - jobs: rspec: name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }} runs-on: ${{ matrix.os }} - continue-on-error: ${{ matrix.experimental }} + continue-on-error: true strategy: fail-fast: false matrix: - ruby: [ 2.7, 2.6, 2.5 ] + ruby: [ 3.3, 3.2, 3.1, "3.0", 2.7, 2.6 ] os: [ ubuntu-latest, windows-latest, macos-latest ] - experimental: [ false ] - include: - - ruby: 3.0 - os: 'ubuntu-latest' - experimental: true - - ruby: 3.0 - os: 'windows-latest' - experimental: true - - ruby: 3.0 - os: 'macos-latest' - experimental: true - env: BUNDLE_WITHOUT: "secryst" SKIP_JS: "1" steps: - - name: Checkout repository and submodules + - name: Checkout repository uses: actions/checkout@v2 with: - submodules: true + repository: interscript/interscript + + - name: Run bootstrap script + run: ruby bootstrap.rb - name: Use Ruby uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby }} bundler-cache: true - working-directory: ./ruby - name: Run RSpecs + working-directory: ./ruby run: | + pip install regex bundle install --with=jsexec - bundle exec rspec -f f + bundle exec rspec diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f8d92e20..d8de6a2e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,14 +9,17 @@ jobs: release: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - name: Checkout repository + uses: actions/checkout@v2 with: - submodules: true + repository: interscript/interscript - - uses: actions/setup-ruby@v1 + - name: Run bootstrap script + run: ruby bootstrap.rb + + - uses: ruby/setup-ruby@v1 with: - ruby-version: '2.7' - architecture: 'x64' + ruby-version: '3.0' - uses: actions/setup-node@v1 with: @@ -24,45 +27,34 @@ jobs: # For now let's install without secryst, as we don't necessarily need it. # We may need to change it once we start to depend on secryst maps. - - run: pushd ruby && bundle install --jobs 4 --retry 3 --with jsexec --without secryst && popd + - name: Install bundle + working-directory: ./ruby + run: bundle install --jobs 4 --retry 3 --with jsexec --without secryst - - name: Test the Ruby package - run: pushd ruby && bundle exec rake && popd + - name: Test Ruby package + working-directory: ./ruby + run: bundle exec rake - - name: Test the JS package - run: pushd js && npm install && npm run prepareMaps && npm test && popd + - name: Test JS package + working-directory: ./js + run: npm install && npm run prepareMaps && npm test - name: Publish to rubygems.org env: RUBYGEMS_API_KEY: ${{secrets.INTERSCRIPT_RUBYGEMS_API_KEY}} run: | gem install gem-release + mkdir -p ~/.gem touch ~/.gem/credentials cat > ~/.gem/credentials << EOF --- :rubygems_api_key: ${RUBYGEMS_API_KEY} EOF chmod 0600 ~/.gem/credentials - pushd js - git status - popd - pushd maps - git status - gem release - popd pushd ruby git status gem release popd - - name: Publish to npmjs.org - env: - NPMJS_TOKEN: ${{secrets.INTERSCRIPT_NPM_TOKEN}} - run: | - pushd js - npm config set //registry.npmjs.org/:_authToken=$NPMJS_TOKEN - npm run prepareMaps - npm publish - popd # Let's keep it commented out for now. Please uncomment it once you are ready with # interscript-api to support Interscript v2. diff --git a/.gitignore b/.gitignore index 8c65f879..6f2070e6 100644 --- a/.gitignore +++ b/.gitignore @@ -4,12 +4,12 @@ /coverage/ /InstalledFiles /pkg/ -/ruby/spec/reports/ -/ruby/spec/examples.txt -/ruby/test/tmp/ -/ruby/test/version_tmp/ -/ruby/tmp/ -/ruby/Gemfile.lock +/spec/reports/ +/spec/examples.txt +/test/tmp/ +/test/version_tmp/ +/tmp/ +/Gemfile.lock # Used by dotenv library to load environment variables. # .env @@ -20,10 +20,10 @@ ## Specific to RubyMotion: .dat* .repl_history -/ruby/build/ +/build/ *.bridgesupport -/ruby/build-iPhoneOS/ -/ruby/build-iPhoneSimulator/ +/build-iPhoneOS/ +/build-iPhoneSimulator/ ## Specific to RubyMotion (use of CocoaPods): # @@ -34,15 +34,15 @@ # vendor/Pods/ ## Documentation cache and generated files: -/ruby/.yardoc/ -/ruby/_yardoc/ -/ruby/doc/ -/ruby/rdoc/ +/.yardoc/ +/_yardoc/ +/doc/ +/rdoc/ ## Environment normalization: -/ruby/.bundle/ -/ruby/vendor/bundle -/ruby/lib/bundler/man/ +/.bundle/ +/vendor/bundle +/lib/bundler/man/ # for a library or gem, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: @@ -51,16 +51,29 @@ # .ruby-gemset # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: -/ruby/.rvmrc +/.rvmrc # Don't bundle generated files -/ruby/visualizations -/ruby/json -/ruby/vis_json -/ruby/metadata.json -/ruby/compiled +/visualizations +/json +/vis_json +/auth_json +/metadata.json +/compiled # Used by RuboCop. Remote config files pulled in from inherit_from directive. # .rubocop-https?--* -/js/src/maps -/js/node_modules + +#misic +.idea +.bundle/.bundle/ +/.yardoc +/_yardoc/ +/coverage/ +/doc/ +/pkg/ +/spec/reports/ +/tmp/ + +# rspec failure tracking +.rspec_status diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 5e037dd3..00000000 --- a/.gitmodules +++ /dev/null @@ -1,6 +0,0 @@ -[submodule "maps"] - path = maps - url = https://github.com/interscript/maps.git -[submodule "js"] - path = js - url = https://github.com/interscript/interscript-js.git diff --git a/ruby/.rspec b/.rspec similarity index 100% rename from ruby/.rspec rename to .rspec diff --git a/ruby/Gemfile b/Gemfile similarity index 75% rename from ruby/Gemfile rename to Gemfile index 6301f549..cf4f70d5 100644 --- a/ruby/Gemfile +++ b/Gemfile @@ -26,4 +26,19 @@ unless ENV["SKIP_JS"] end end +unless ENV["SKIP_PYTHON"] + group :pyexec do + gem 'pycall' + end +end + +group :rababa do + gem 'rababa', "~> 0.1.1" +end + +gem 'pry' + +gem 'iso-639-data' +gem 'iso-15924' + gem 'simplecov', require: false, group: :test diff --git a/README.adoc b/README.adoc deleted file mode 100644 index 59165bb0..00000000 --- a/README.adoc +++ /dev/null @@ -1,176 +0,0 @@ -= Interscript: Interoperable Script Conversion Systems, with Ruby and JavaScript runtimes - -image:https://github.com/interscript/interscript/workflows/test/badge.svg["Ruby build status", link="https://github.com/interscript/interscript/actions?workflow=test"] -image:https://github.com/interscript/interscript/workflows/js/badge.svg["JavaScript build status", link="https://github.com/interscript/interscript/actions?workflow=js"] - -== Introduction - -This repository contains interoperable transliteration schemes from: - -* ALA-LC -* BGN/PCGN -* ICAO -* ISO -* UN (by UNGEGN) -* Many, many other script conversion system authorities. - -The goal is to achieve interoperable transliteration schemes allowing quality comparisons. - - - -== Demonstration - -These transliteration systems are used in the demo: - -`bgnpcgn-rus-Cyrl-Latn-1947`:: BGN/PCGN Romanization of Russian -`iso-rus-Cyrl-Latn-9-1995`:: ISO 9 Romanization of Russian -`icao-rus-Cyrl-Latn-9303`:: ICAO MRZ Romanization of Russian -`bas-rus-Cyrl-Latn-2017-bss`:: Bulgaria Academy of Science Streamlined System for Russian - -image:docs/demo/20191118-interscript-demo-cast.gif["interscript screencast"] - - -== Installation - -=== Prerequisites - -Interscript depends on Ruby. Once you manage to install Ruby, it's easy. This part -won't work until we release Interscript v2, please use the one below. - -[source,sh] ----- -gem install interscript -v "~>2.0" ----- - -You can also download a local copy of this Git repository, eg. for development -purposes: - -[source,sh] ----- -git clone https://github.com/interscript/lcs -cd lcs/ruby -bundle install ----- - -==== Additional prerequisites for Thai systems - -If you want to transliterate Thai systems, you will need to install some additional -requirements. Please consult: link:docs/Usage_with_Secryst.adoc[Usage with Secryst]. - -== Usage - -Assume you have a file ready in the source script like this: - -[source,sh] ----- -cat < rus-Cyrl.txt -Эх, тройка! птица тройка, кто тебя выдумал? знать, у бойкого народа ты -могла только родиться, в той земле, что не любит шутить, а -ровнем-гладнем разметнулась на полсвета, да и ступай считать версты, -пока не зарябит тебе в очи. И не хитрый, кажись, дорожный снаряд, не -железным схвачен винтом, а наскоро живьём с одним топором да долотом -снарядил и собрал тебя ярославский расторопный мужик. Не в немецких -ботфортах ямщик: борода да рукавицы, и сидит чёрт знает на чём; а -привстал, да замахнулся, да затянул песню — кони вихрем, спицы в -колесах смешались в один гладкий круг, только дрогнула дорога, да -вскрикнул в испуге остановившийся пешеход — и вон она понеслась, -понеслась, понеслась! - -Н.В. Гоголь -EOT ----- - -You can run `interscript` on this text using different transliteration systems. - -[source,sh] ----- -interscript rus-Cyrl.txt \ - --system=bgnpcgn-rus-Cyrl-Latn-1947 \ - --output=bgnpcgn-rus-Latn.txt - -interscript rus-Cyrl.txt \ - --system=iso-rus-Cyrl-Latn-9-1995 \ - --output=iso-rus-Latn.txt - -interscript rus-Cyrl.txt \ - --system=icao-rus-Cyrl-Latn-9303 \ - --output=icao-rus-Latn.txt - -interscript rus-Cyrl.txt \ - --system=bas-rus-Cyrl-Latn-2017-bss \ - --output=bas-rus-Latn.txt ----- - -It is then easy to see the exact differences in rendering between the systems. - -[source,sh] ----- -diff bgnpcgn-rus-Latn.txt bas-rus-Latn.txt ----- - -If you use Interscript from the Git repository, you would call the following command -instead of `interscript`: - -[source,sh] ----- -# Ensure you are in your Git repository root path -ruby/bin/interscript rus-Cyrl.txt \ - --system=bas-rus-Cyrl-Latn-2017-bss \ - --output=bas-rus-Latn.txt ----- - -== Adding transliteration system - -Please consult link:docs/Map_Editing_Guide.adoc[the Map Editing Guide] - -== Integration with Ruby applications - -Please consult link:docs/Integration_with_Ruby_Applications.adoc[the guide for integration with Ruby applications] - -== ISCS system codes - -In accordance with -http://calconnect.gitlab.io/tc-localization/csd-transcription-systems[ISO/CC 24229], -the system code identifying a script conversion system has the following components: - -e.g. `bgnpcgn-rus-Cyrl-Latn-1947`: - -`bgnpcgn`:: the authority identifier -`rus`:: an ISO 639-{1,2,3,5} language code that this system applies to (For 639-2, use (T) code) -`Cyrl`:: an ISO 15924 script code, identifying the source script -`Latn`:: an ISO 15924 script code, identifying the target script -`1947`:: an identifier unit within the authority to identify this system - - -== Covered languages - -Currently the schemes cover Cyrillic, Armenian, Greek, Arabic and Hebrew. - - -== Samples to play with - -* `rus-Cyrl-1.txt`: Copied from the XLS output from http://www.primorsk.vybory.izbirkom.ru/region/primorsk?action=show&global=true&root=254017025&tvd=4254017212287&vrn=100100067795849&prver=0&pronetvd=0®ion=25&sub_region=25&type=242&vibid=4254017212287 - -* `rus-Cyrl-2.txt`: Copied from the XLS output from http://www.yaroslavl.vybory.izbirkom.ru/region/yaroslavl?action=show&root=764013001&tvd=4764013188704&vrn=4764013188693&prver=0&pronetvd=0®ion=76&sub_region=76&type=426&vibid=4764013188704 - - -== References - -Reference documents are located at the -https://github.com/interscript/interscript-references[interscript-references repository]. -Some specifications that have distribution limitations may not be reproduced there. - - -== Links to system definitions - -* https://www.iso.org/committee/48750.html[ISO/TC 46 (see standards published by WG 3)] -* http://geonames.nga.mil/gns/html/romanization.html[BGN/PCGN and BGN Romanization systems (BGN)] -* https://www.gov.uk/government/publications/romanization-systems[BGN/PCGN Romanization systems (PCGN)] -* https://www.loc.gov/catdir/cpso/roman.html[ALA-LC Romanization systems in current use] -* http://catdir.loc.gov/catdir/cpso/roman.html[ALA-LC Romanization systems from 1997] -* http://www.eki.ee/wgrs/[UN Romanization systems] -* http://www.eki.ee/knab/kblatyl2.htm[EKI KNAB systems] - -== Copyright and license - -This is a Ribose project. Copyright Ribose. diff --git a/ruby/README.md b/README.md similarity index 100% rename from ruby/README.md rename to README.md diff --git a/ruby/Rakefile b/Rakefile similarity index 62% rename from ruby/Rakefile rename to Rakefile index d8b68da7..e5ed34b3 100644 --- a/ruby/Rakefile +++ b/Rakefile @@ -14,13 +14,19 @@ task :compile, [:compiler, :target] do |t, args| when "javascript" require "interscript/compiler/javascript" [Interscript::Compiler::Javascript, "js"] + when "python" + require "interscript/compiler/python" + [Interscript::Compiler::Python, "py"] end FileUtils.mkdir_p(args[:target]) maplist = {} - Interscript.maps.each do |map| + maps = Interscript.maps + maps = Interscript.exclude_maps(maps, compiler: compiler, platform: false) + + maps.each do |map| code = compiler.(map).code File.write(args[:target] + "/" + map + "." + ext, code) maplist[map] = nil @@ -31,23 +37,7 @@ task :compile, [:compiler, :target] do |t, args| File.write(args[:target] + "/" + map + "." + ext, code) end - File.write(args[:target] + "/index.json", maplist.to_json) -end - -task :version, [:ver] do |t, ver| - ver = ver[:ver] - - rubyver = File.read(rubyfile = __dir__+"/lib/interscript/version.rb") - jsver = File.read(jsfile = __dir__+"/../js/package.json") - mapsver = File.read(mapsfile = __dir__+"/../maps/interscript-maps.gemspec") - - rubyver = rubyver.gsub(/(VERSION = ")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") - jsver = jsver.gsub(/("version": ")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") - mapsver = mapsver.gsub(/(INTERSCRIPT_MAPS_VERSION=")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") - - File.write(rubyfile, rubyver) - File.write(jsfile, jsver) - File.write(mapsfile, mapsver) + File.write(args[:target] + "/index.json", maplist.to_json) if args[:compiler] == "javascript" end task :generate_visualization_html do @@ -68,6 +58,7 @@ task :generate_metadata_json do require "fileutils" require "json" require "interscript" + require "interscript/compiler/javascript" FileUtils.rm_rf(file = __dir__+"/metadata.json") @@ -75,6 +66,10 @@ task :generate_metadata_json do parsed_map = Interscript.parse(map) md = parsed_map.metadata.to_hash md["test"] = parsed_map.tests&.data&.first + md["skip_js"] = Interscript.exclude_maps([map], + compiler: Interscript::Compiler::Javascript, + platform: false, + ).empty? [map, md] end.to_h @@ -113,4 +108,35 @@ task :generate_visualization_json do end end +task :generate_authority_json do + require "interscript" + require "json" + require "iso-639-data" + + FileUtils.rm_rf(dir = __dir__+"/auth_json/") + FileUtils.mkdir_p(dir) + + %w[iso icao din].each do |auth| + out = Interscript.maps.select do |map_name| + map_name.start_with? "#{auth}-" + end.sort.map do |map_name| + map = Interscript.parse(map_name) + tests = map.tests&.data&.first(2)&.transpose || [] + std, lang = map.metadata.data[:language].split(':') + + { + lang: std.end_with?("-3") ? Iso639Data.iso_639_3[lang]['Ref_Name'] : Iso639Data.iso_639_2[lang]['eng'], + isoName: map.metadata.data[:name], + systemName: map_name, + samples: tests[0] || [], + english: [], + result: [] + } + end + + json = JSON.pretty_generate(out) + File.write(dir+auth+".json", json) + end +end + task :default => :spec diff --git a/bin/console b/bin/console new file mode 100755 index 00000000..20b3d454 --- /dev/null +++ b/bin/console @@ -0,0 +1,10 @@ +#!/usr/bin/env ruby + +require "bundler/setup" +require "interscript" + +require "interscript/utils/helpers" +include Interscript::Utils::Helpers + +require "pry" +Pry.start diff --git a/ruby/bin/interscript b/bin/interscript similarity index 100% rename from ruby/bin/interscript rename to bin/interscript diff --git a/ruby/bin/maps_analyze_staging b/bin/maps_analyze_staging similarity index 100% rename from ruby/bin/maps_analyze_staging rename to bin/maps_analyze_staging diff --git a/ruby/bin/maps_debug_compilers b/bin/maps_debug_compilers similarity index 100% rename from ruby/bin/maps_debug_compilers rename to bin/maps_debug_compilers diff --git a/ruby/bin/maps_debug_ordering b/bin/maps_debug_ordering similarity index 100% rename from ruby/bin/maps_debug_ordering rename to bin/maps_debug_ordering diff --git a/ruby/bin/maps_debug_ruby_compile b/bin/maps_debug_ruby_compile similarity index 100% rename from ruby/bin/maps_debug_ruby_compile rename to bin/maps_debug_ruby_compile diff --git a/ruby/bin/maps_debug_step_by_step b/bin/maps_debug_step_by_step similarity index 100% rename from ruby/bin/maps_debug_step_by_step rename to bin/maps_debug_step_by_step diff --git a/ruby/bin/maps_optimize_order b/bin/maps_optimize_order similarity index 100% rename from ruby/bin/maps_optimize_order rename to bin/maps_optimize_order diff --git a/ruby/bin/maps_v1_analyze_regexps b/bin/maps_v1_analyze_regexps similarity index 100% rename from ruby/bin/maps_v1_analyze_regexps rename to bin/maps_v1_analyze_regexps diff --git a/ruby/bin/maps_v1_to_v2 b/bin/maps_v1_to_v2 similarity index 100% rename from ruby/bin/maps_v1_to_v2 rename to bin/maps_v1_to_v2 diff --git a/bin/set_version b/bin/set_version new file mode 100755 index 00000000..aa99c3d5 --- /dev/null +++ b/bin/set_version @@ -0,0 +1,16 @@ +#!/usr/bin/env ruby +ver = ARGV[0] +part = ARGV[1] + +rubyver = File.read(rubyfile = __dir__+"/../lib/interscript/version.rb") +jsver = File.read(jsfile = __dir__+"/../../js/package.json") +mapsver = File.read(mapsfile = __dir__+"/../../maps/interscript-maps.gemspec") + +rubyver = rubyver.gsub(/(VERSION = ")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") +jsver = jsver.gsub(/("version": ")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") +mapsver = mapsver.gsub(/(INTERSCRIPT_MAPS_VERSION=")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") + +File.write(rubyfile, rubyver) if %w[all ruby].include? part +File.write(jsfile, jsver) if %w[all js].include? part +File.write(mapsfile, mapsver) if %w[all maps].include? part + diff --git a/ruby/bin/setup b/bin/setup similarity index 100% rename from ruby/bin/setup rename to bin/setup diff --git a/docs/Integration_with_Ruby_Applications.adoc b/docs/Integration_with_Ruby_Applications.adoc deleted file mode 100644 index 40d3c49e..00000000 --- a/docs/Integration_with_Ruby_Applications.adoc +++ /dev/null @@ -1,72 +0,0 @@ -= Integration with Ruby Applications - -Interscript can be used as a Ruby Gem library to be integrated with other Ruby -applications. - -== Gemfile - -You need to make sure your Gemfile contains the following lines: - -[source,ruby] ----- -source "https://rubygems.org" - -gem "interscript", "~>2.0" ----- - -== Requiring - -In your codebase, if you don't do `Bundler.require`, you will need to add the -following line: - -[source,ruby] ----- -require "interscript" ----- - -== Listing all available maps - -To list all available maps, one must execute the following code: - -[source,ruby] ----- -maps = Interscript.maps ----- - -`maps` will be an array containing all Interscript maps by their name. - -== Transliterating text - -To transliterate test using a given map, like `bas-rus-Cyrl-Latn-2017-bss`, -one must execute: - -[source,ruby] ----- -cache = {} -input = "Хелло" -output = Interscript.transliterate("bas-rus-Cyrl-Latn-2017-bss", - input, - cache) ----- - -You should preserve the `cache` variable for performance reasons. It is optional, -you don't need to (but should) supply it. - -=== Using Ruby compiler - -If performance is of utmost performance for your application and you want to -sacrifice a little bit of loading time for much better performance, you can use -`Interscript::Compiler::Ruby` instead of `Interscript::Interpreter` (which is -used by default). - -[source,ruby] ----- -require "interscript/compiler/ruby" - -cache = {} -input = "Хелло" -output = Interscript.transliterate("bas-rus-Cyrl-Latn-2017-bss", - input, - cache, - compiler: Interscript::Compiler::Ruby) ----- diff --git a/docs/Interscript_Map_Format.adoc b/docs/Interscript_Map_Format.adoc deleted file mode 100644 index dc2ec9ab..00000000 --- a/docs/Interscript_Map_Format.adoc +++ /dev/null @@ -1,425 +0,0 @@ -= Interscript Map format syntax - -This document describes the DSL-based files with an extension `.iml` or `.imp`. - -An `.imp` file is a file containing a standalone transliteration map. For -instance, a map that can transliterate a Korean file to a Latin file. - -An `.iml` file is a file that contains a library of aliases and stages to be -used by the `.imp` maps. It follows the same format, but does not require the -metadata and tests parts to exist and doesn't allow the `main` stage to exist. -This document describes the map version of the format if it isn't noted -otherwise. - -== Basic syntax - -A `\#` character is a comment character. This means, that the part that follows -a `#` character till the end of the line is ignored by Interscript, but exist to -communicate to a human reader the intention behind the content. In this document -it is most often a hint to a person reading this document. - -A String is a part of the document of a form either: `"content"` or `'content'`. -It denotes a group of characters to be used. It can be joined together using a -`+` character like so: `"a" + "b"` which is equal to as if someone wrote just -`"ab"`. - -Except for the strings of the form `'content'`, all those forms can contain -escape forms like `\u0410`, which means "An Unicode character 0410". The usage -of those forms is discouraged in new maps, but possible. - -An array (or a list) is a part of the document of a form `["a", "b", "c"]`. It -means a sequential group of Strings, or other types. - -== Document - -The root part of the `.iml` file is called a document. A map has a format as -follows: - -[source,ruby] ----- -metadata { - # Metadata part comes here -} - -tests { - # Tests part comes here -} - -# A dependency directive may happen zero or more times. It will be described in -# a subsection. -dependency "other-map-or-library", as: shortname - -# This part is optional -aliases { - # Aliases part comes here -} - -stage { - # A stage description comes here -} - -# There may be more than 1 stage, the other stages need to have a name. The -# default stage name is `main`. A name can't happen more than once in a document. - -stage(stage_name) { - # A stage description comes here -} ----- - -=== Dependency - -Dependency is an instruction to be issued only in the document context. It means -that we want to import some aliases or stages from another map or a library. - -[source,ruby] ----- -dependency "other-map-or-library", as: shortname ----- - -This instruction will allow us to reference aliases and stages from other -libraries in this form: `map.shortname.stage.stagename` for stages and -`map.shortname.aliasname` for aliases. - -There is a second syntax, mostly useful for loading libraries that will import -the stages and aliases to a global context resulting in possibly more human -readable maps: - -[source,ruby] ----- -dependency "other-map-or-library", as: shortname, import: true ----- - -This form allows to reference other stages and aliases in the following form: -`stage.stagename`, `aliasname` - -It is not possible to load maps using this form, only libraries, because we -can't override the `main` stage. - -The standard library is implicitly imported this way. There's no way or need to -import it explicitly. - -==== Standard library - -All maps depend on a standard library implicitly. This standard library defines -a few useful aliases that may or may not be expressed otherwise. - -Below is a table that describes the aliases defined by the standard library: - -|=== -| `none` | An empty string -| `space` | A space character -| `whitespace` | Any whitespace ascii character (space, tab, line-delimiter, ...) -| `boundary` | A word boundary (see below for what institutes a word character) -| `word` | An ascii word character (a-z, A-Z, 0-9, _) -| `not_word` | Negation of the above -| `alpha` | Any ascii alphabetic character (a-z, A-Z) -| `not_alpha` | Negation of the above -| `digit` | Any ascii digit -| `not_digit` | Negation of the above -| `line_start` | Beginning of a line -| `line_end` | Ending of a line -| `string_start` | Beginning of a string -| `string_end` | Ending of a string -|=== - -Any standard library (or otherwise) aliases can be joined with anything else -using a + command, for example: `line_start + "rest"`. - -== Metadata part - -The metadata part describes our map. It follows a YAML syntax. - -[source,ruby] ----- -metadata { - # ID of the authority that provided the transliteration rules we are about to implement - authority_id: iso - # ID of the rules, most often the year they were defined - id: 1996-method1 - # The language code of the map - language: iso-639-2:kor - # The source script of our map, in our example Hang for Hangul - source_script: Hang - # The destination script of our map - destination_script: Latn - # The longer name of our map - name: ISO/TR 11941:1996 Information and documentation — Transliteration of Korean script into Latin characters - # The URL where it was published - url: https://www.iso.org/standard/20564.html - # The creation date of our map - creation_date: 1996 - # The adoption date of our map, or empty if not adopted - adoption_date: "" - # The description of our map - description: | - Establishes a system for the transliteration of the characters of Korean script into Latin characters. - Intended to provide a means for international communication of written documents. - - # The notes that describe some parts of our map that we are about to implement - notes: - - A word-initial hard sign 'ъ' is not represented, but instead is left out of the transliteration. - - The romanization follows the dialect spoken in Chechnya rather than other local pronunciations. -} ----- - -== Tests part - -The tests part describes a group of the tests to be executed by the automated -system to verify that the map is defined properly. An example tests part looks -like this: - -[source,ruby] ----- -tests { - test "애기", "aeki" - test "방", "pang" -} ----- - -This means, that we want to test our map to transliterate a string "애기" to -"aeki" and "방" to "pang". - -== Aliases part - -An aliases part describes a group of aliases to be used by the stages to -simplify the code of our map. - -Let's suppose that our map refers to "Double consonant jamo" and "Aspirated -consonant jamo" quite extensively. We can alias those - -[source,ruby] ----- -aliases { - def_alias double_cons_jamo, any("ᄁᄄᄈᄍᄊ") - def_alias aspirated_cons_jamo, any("ᄏᄐᄑᄎ") -} ----- - -And later in the stage part refer to them by just `double_cons_jamo`, not -needing to repeat ourselves. - -== Stage part - -A stage part describes a stage, a sequential group of steps to transliterate -a string from a source script code to a destination script code. An example -stage looks like the following: - -[source,ruby] ----- -stage { - run map.hangjamo.stage.main - sub any("ᄀᆨ"), "k" - sub any("ᄏᆿ"), "kh" - parallel { - sub "ᅡ", "a" - sub "ᅥ", "eo" - } -} ----- - -A stage can be named, as described in the Document section. The default name -of a stage is `main`. - -=== `sub` call - -A `sub` call does a substitution of an item (string, character, alias) with -another item. - -[source,ruby] ----- -stage { - sub "source", "destination" -} ----- - -This call allows for some named parameters: - -[cols="2"] -|=== -| `before:` -| Execute this substitution only if the "source" is preceded by what is given - as a parameter, but won't replace it, it will only replace the "source". - -| `after:` -| Same, but this parameter denotes what is used after. - -| `not_before:`, `not_after:` -| Negation of `before:` and `after:`. The substitution will only happen if a - parameter is NOT present before or after the "source". -|=== - -For example: - -[source,ruby] ----- -stage { - sub boundary + "Е", "Ye", not_before: "’" - sub boundary + "е", "ye", not_before: "’" - - sub none, "'", not_before: hangul, after: aspirated_cons -} ----- - -==== Multiple replacements - -In various maps there was a need to document multiple replacements. Let's suppose -our character set has a character "a" that can be transliterated to any of the -forms "X", "Y" or "Z". As of now, it means that "a" is always translated to "X", -as it came first. In the future it will be possible to execute such a map in -reverse as well. - -[source,ruby] ----- -stage { - sub "a", any("XYZ") -} ----- - -=== `parallel` block - -A parallel block can be defined as a subsection of a `stage` part. It indicates -that the steps inside need to be executed in parallel. At the current time, only -`sub` calls can be executed in parallel. It also means, that those steps will try -to find the longest substrings first. - -[source,ruby] ----- -stage { - parallel { - sub "А", 'A' - sub "Б", 'B' - sub "В", 'V' - sub "Г", 'G' - } -} ----- - -==== Simple mode - -If there are only rules with simple sub rules, we are using a fast track -implementation. By simple sub rules we mean those rules that lack a before/after -part and ones that only use string and possibly `any` items with concatenation. - -=== `run` call - -The run call runs a stage defined inside the document, or another map or -library. If this map isn't local, a map or library dependency needs to be -declared using the `dependency` call. - -For example: - -[source,ruby] ----- -stage { - # If dependency declared without import: true - run map.hangjamo.stage.main - # If dependency declated with import: true, or we reference a local stage - run stage.remove_spaces -} ----- - -=== Standard library functions - -There are certain conversions that may be hard to be achieved using stages, those -are implemented in respective standard libraries using programming languages. - -For a function named `title_case`, it can be called with the following: - -[source,ruby] ----- -stage { - title_case -} ----- - -A standard library function can take (named) arguments. Those are described in -the table below and they may be omitted if a default value is specified. - -==== List of standard library functions - -[options="header"] -|=== -| Function name | Arguments | Sample input | Sample output -| `title_case` | `word_separator: " "` | `"example string"` | `"Example String"` -| `downcase` | | `"HELLO WORLD"` | `"hello world"` -| `compose` | | `"ᄆ"+"ᅮ"` | `"무"` -| `decompose` | | `"무"` | `"ᄆ"+"ᅮ"` -| `separate` | `separator: " "` | `"こんいちは"` | `"こ ん い ち は"` -| `secryst` | `model:` 2+>| Consult: link:Usage_with_Secryst.adoc[Usage with Secryst]. -|=== - -== Items - -Interscript doesn't work purely on Strings, even though Strings are mostly -referenced to by this document. The items can be used in the `alias` and `stage` -context. - -=== String item - -The most basic kind of item. For example `"Г"` means "match Г" or "replace -with Г" depending on usage context. Some contexts will only accept strings, or -aliases to strings. - -=== `+` method - -Items can be concatenated (added together) to denote a complex item. For instance: -`any("ab") + "e"` means "either ae or be" and is equivalent to `any(["ae", "be"])`. - -=== `any` item - -Any denotes some alternative variations of a string. It has 3 forms of call: - -* `any("abcde")` - any character: a, b, c, d or e -* `any(["one", "two"])` - any string: one or two -* `any("a".."z")` - any character from a to z - -Any can be also used with other kinds of items than String, for instance: - -[source,ruby] ----- -stage { - sub any([line_start + "a", "a" + line_end]), none -} ----- - -=== `maybe`, `some`, `maybe_some` items - -If you want a given item to be allowed to be repeated respectively: 0 to 1 times, -1 to Infinity times, 0 to Infinity times, you can surround it with respectively: -`maybe()`, `some()`, `maybe_some()`. - -[source,ruby] ----- -stage { - sub "a"+maybe("-")+"b", "AB" # Equivalent to regexp: a-?b - sub "a"+some("-")+"b", "AB" # Equivalent to regexp: a-+b - sub "a"+maybe_some("-")+"b", "AB" # Equivalent to regexp: a-*b -} ----- - -=== `alias` item - -An alias item references an alias. For example `map.other_map.alias_from_other_map` -or simply `a_local_alias_or_an_alias_from_imported_library`. - -=== `capture` and `ref` items - -Sometimes there may be a need to reference a group from input inside output (or -input too). People who know regular expressions are familiar with expressions of -some form of `replace /(a)/, '[\1]'`. Interscript supports this kind of syntax: - -[source,ruby] ----- -stage { - sub capture(any("abc")), "["+ref(1)+"]" -} ----- - -When ran against a string `"abcde"`, this stage will produce an output of -`"[a][b][c]de"`. - -== Ending notes - -This document described everything Interscript currently supports, but it is -strongly advised to read the existing maps to get a grasp of how those -functionalities can be used best. diff --git a/docs/Maintainers.adoc b/docs/Maintainers.adoc deleted file mode 100644 index 878ebfcf..00000000 --- a/docs/Maintainers.adoc +++ /dev/null @@ -1,37 +0,0 @@ -= Maintainers - -This is a documentation intended for Interscript maintainers. It covers all possible tasks one -may need to execute. - -== Releasing - -The following is a set of steps one needs to make to do a release: - -[source,sh] ---- -# We assume this is executed in the main Interscript repository root directory. -# Adjust the V to reflect a correct version -V="2.1.0a1" -# Adjust the B to reflect a correct branch name. For now, master. In the future we may decide on -# how to do stable branches. -B="master" -# Commit command -COMMIT="git commit" -# Ensure we are on the latest repository version and all subrepos are up to date as well. -git checkout $B; git pull; git reset -pushd js; git checkout $B; git pull; git reset; popd -pushd maps; git checkout $B; git pull; git reset; popd -# This is the point when you may want to run tests and ensure everything is correct. -# Run the version update script -pushd ruby; bundle exec rake version[$V]; popd -# Add the new version to the submodules, commit it and tag it -pushd js; git add package.json; $COMMIT -m "Release v$V"; git tag "v$V"; popd -pushd maps; git add interscript-maps.gemspec; $COMMIT -m "Release v$V"; git tag "v$V"; popd -# Add the new version and submodules to the main repo, commit it and tag it -git add js maps ruby/lib/interscript/version.rb; $COMMIT -m "Release v$V"; git tag "v$V" -# Push everything in the correct order -pushd js; git push; git push --tags; popd -pushd maps; git push; git push --tags; popd -git push; git push --tags -# Our new version is released! ---- \ No newline at end of file diff --git a/docs/Map_Editing_Guide.adoc b/docs/Map_Editing_Guide.adoc deleted file mode 100644 index fa9091e1..00000000 --- a/docs/Map_Editing_Guide.adoc +++ /dev/null @@ -1,43 +0,0 @@ -= Interscript map editing guide - -Transliteration systems stored in a `maps/maps/` directory as Interscript Map files. -You can create a new file and add it to the directory. - -The file should be named as `.imp`, where `system-code` -is in accordance with -http://calconnect.gitlab.io/tc-localization/csd-transcription-systems[ISO/CC 24229]. - -== File structure - -The file structure is described in link:Interscript_Map_Format.adoc[Interscript Map Format documentation] - -== Testing transliteration systems - -To test all transliteration systems in the `maps/` directory, run: - -[source,sh] ----- -cd ruby/ -bundle exec rspec ----- - -The command takes `source` texts from the `test` section, transforms -them using `rules` and `charmaps` from the `map` key, and compares the -results with `expected:` text from the `source:` section. - -To test a specific transliteration system, set the environment variable -`TRANSLIT_SYSTEM` to the system code of the desired system -(i.e. the "`basename`" of the system's YAML file): - -[source,sh] ----- -TRANSLIT_SYSTEM=bgnpcgn-rus-Cyrl-Latn-1947 bundle exec rspec spec/interscript_spec.rb ----- - -To test staging maps, which may or may not work, you would need to execute -a slightly different command: - -[source,sh] ----- -TRANSLIT_SYSTEM=bgnpcgn-rus-Cyrl-Latn-1947 INTERSCRIPT_STAGING=1 bundle exec rspec spec/interscript_spec.rb ----- diff --git a/docs/Usage_with_Secryst.adoc b/docs/Usage_with_Secryst.adoc deleted file mode 100644 index 476faeae..00000000 --- a/docs/Usage_with_Secryst.adoc +++ /dev/null @@ -1,43 +0,0 @@ -= Usage with Secryst - -Secryst is a seq2seq transformer suited for transliteration. Written in Ruby. -It's installation is a bit tricky, you should consult its own installation guide -(https://github.com/secryst/secryst[at GitHub]). By default we don't use Secryst, -unless you have installed it. - -== Using it standalone - -It's enough to install it. Be sure to consult the guide above. - -== Integration with Ruby Applications - -In your Gemfile, add: - -[source,ruby] ----- -source "https://rubygems.org" - -gem "secryst" ----- - -Create a Secrystfile near your Gemfile with the following, for each model you -want to use in your application. Please consult our Secrystfile to get all the -maps needed to - -[source,ruby] ----- -model "model-name" ----- - -== Usage inside maps - -[source,ruby] ----- -stage { - # ... sub "a", "b" ... - secryst model: "model-name" - # ... sub "c", "d" ... -} ----- - -As of now, Secryst is usable only by the Ruby implementation. diff --git a/docs/demo/20191118-interscript-demo-cast.gif b/docs/demo/20191118-interscript-demo-cast.gif index fe881429..a2808c47 100644 Binary files a/docs/demo/20191118-interscript-demo-cast.gif and b/docs/demo/20191118-interscript-demo-cast.gif differ diff --git a/ruby/exe/interscript b/exe/interscript similarity index 100% rename from ruby/exe/interscript rename to exe/interscript diff --git a/ruby/interscript.gemspec b/interscript.gemspec similarity index 90% rename from ruby/interscript.gemspec rename to interscript.gemspec index 8bdf0ea1..f52349fc 100644 --- a/ruby/interscript.gemspec +++ b/interscript.gemspec @@ -27,5 +27,6 @@ Gem::Specification.new do |spec| spec.require_paths = ["lib"] spec.add_dependency "thor" - spec.add_dependency "interscript-maps" + spec.add_dependency "interscript-maps", "~> #{Interscript::VERSION.split('.')[0,2].join(".")}.0a" + spec.add_dependency "text" end diff --git a/js b/js deleted file mode 160000 index 003abf8d..00000000 --- a/js +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 003abf8dba03f1d7845ba92ea367191392cfba7e diff --git a/ruby/lib/interscript.rb b/lib/interscript.rb similarity index 50% rename from ruby/lib/interscript.rb rename to lib/interscript.rb index 2f02974d..21c9c2fc 100644 --- a/ruby/lib/interscript.rb +++ b/lib/interscript.rb @@ -2,7 +2,14 @@ require "yaml" module Interscript + # An error caused by a lack of some map class MapNotFoundError < StandardError; end + # An error caused by a missing dependency + class ExternalUtilError < StandardError; end + # An error caused by a particular compiler + class SystemConversionError < StandardError; end + # An error caused by an incorrect map implementation + class MapLogicError < StandardError; end class << self def load_path @@ -41,9 +48,9 @@ def transliterate_each(system_code, string, maps={}, &block) load(system_code, maps).(string, each: true, &block) end - def transliterate_file(system_code, input_file, output_file, maps={}) + def transliterate_file(system_code, input_file, output_file, maps={}, compiler: Interscript::Interpreter) input = File.read(input_file) - output = transliterate(system_code, input, maps) + output = transliterate(system_code, input, maps, compiler: compiler) File.open(output_file, 'w') do |f| f.puts(output) @@ -53,6 +60,16 @@ def transliterate_file(system_code, input_file, output_file, maps={}) output_file end + # Detects the transliteration that gives the most close approximation + # of transliterating source into destination. + # + # Set multiple: true to get a full report. + def detect(source, destination, **kwargs) + detector = Detector.new + detector.set_from_kwargs(**kwargs) + detector.(source, destination) + end + def map_gems @map_gems ||= Gem.find_latest_files('interscript-maps.yaml').map do |i| [i, YAML.load_file(i)] @@ -76,6 +93,57 @@ def secryst_index_locations end.compact.flatten end + def rababa_configs + @rababa_configs ||= map_gems.map do |i,v| + v["rababa-configs"] + end.compact.inject({}) do |a,b| + a.merge(b) + end + end + + # This code is borrowed from Secryst and should end up in Rababa, but for now, + # let's keep it here. + def rababa_provision(model_name, model_uri) + require 'fileutils' + require 'open-uri' + + # We provision the environment in the following way: + # First, we try the RABABA_DATA environment variable. If that's available, + # we use it to store the Rababa data we need. Otherwise, we try the following + # paths: + + possible_paths = [ + "/var/lib/rababa", + "/usr/local/share/rababa", + "/usr/share/rababa", + File.join(Dir.home, ".local/share/rababa") + ] + + # We find the first writable path + + write_path = nil + + ([ENV["RABABA_DATA"]] + possible_paths).compact.each do |path| + FileUtils.mkdir_p(path) + write_path = path + break + rescue + end + + raise ExternalUtilError, "Can't find a writable path for Rababa. Consider setting a RABABA_DATA environment variable" unless write_path + + model_path = "#{write_path}/model-#{model_name}.onnx" + + # Redownload every hour + if File.exist?(model_path) && File.mtime(model_path) + 3600 >= Time.now + return model_path + else + data = URI.open(model_uri, encoding: "BINARY").read + File.binwrite(model_path, data) + return model_path + end + end + def map_aliases return @map_aliases if @map_aliases @@ -99,6 +167,22 @@ def maps(basename: true, load_path: false, select: "*", libraries: false) basename ? imps.map { |j| File.basename(j, ".#{ext}") } : imps end + + # Removes the excluded maps for a given compiler and RUBY_PLATFORM. + # To be used by tests + # and builders. It uses the `skip` directive in interscript-maps.yaml + def exclude_maps(maps, compiler:, platform: true) + map_gems.each do |i,v| + [compiler.name, (Gem::Platform.local.os if platform)].compact.each do |name| + skips = v.dig('skip', name) || [] + skips.each do |skip| + skip_re = /#{Regexp.escape(skip).gsub("\\*", ".*?")}/ + maps = maps.grep_v(skip_re) + end + end + end + maps + end end end @@ -109,3 +193,5 @@ def maps(basename: true, load_path: false, select: "*", libraries: false) require 'interscript/dsl' require 'interscript/node' + +require 'interscript/detector' diff --git a/lib/interscript/command.rb b/lib/interscript/command.rb new file mode 100644 index 00000000..aa017e27 --- /dev/null +++ b/lib/interscript/command.rb @@ -0,0 +1,79 @@ +require 'thor' +require 'interscript' +require 'json' + +module Interscript + # Command line interface + class Command < Thor + desc '', 'Transliterate text' + option :system, aliases: '-s', required: true, desc: 'Transliteration system' + option :output, aliases: '-o', required: false, desc: 'Output file' + option :compiler, aliases: '-c', required: false, desc: 'Compiler (eg. Interscript::Compiler::Python)' + # Was this option really well thought out? The last parameter is a cache, isn't it? + #option :map, aliases: '-m', required: false, default: "{}", desc: 'Transliteration mapping json' + + def translit(input) + compiler = if options[:compiler] + compiler = options[:compiler].split("::").last.downcase + require "interscript/compiler/#{compiler}" + Object.const_get(options[:compiler]) + else + Interscript::Interpreter + end + + if options[:output] + Interscript.transliterate_file(options[:system], input, options[:output], compiler: compiler) + else + puts Interscript.transliterate(options[:system], IO.read(input), compiler: compiler) + end + end + + desc 'list', 'Prints allowed transliteration systems' + def list + Interscript.maps(load_path: true).each do |path| + puts path + end + end + + desc 'stats', 'Prints statistics about the maps we have' + def stats + maps = Interscript.maps(load_path: true) + parsed_maps = maps.map { |i| [i, Interscript.parse(i)] }.to_h + maps_by_rule_count = parsed_maps.transform_values do |map| + map.stages.values.map { |i| i.children.map { |j| j.is_a?(Interscript::Node::Group) ? j.children : j } }.flatten.count + end + + authorities, languages, source_scripts, target_scripts = 4.times.map do |i| + maps.group_by { |map| map.split('-')[i] } + end + + puts <<~END + Languages supported: #{languages.keys.count} + Source scripts supported: #{source_scripts.keys.count} + Target scripts supported: #{target_scripts.keys.count} + Authorities supported: #{authorities.keys.count} + Total number of rules in Interscript: #{maps_by_rule_count.values.sum} + + END + + authorities.each do |auth, auth_maps| + rule_counts = auth_maps.map { |i| maps_by_rule_count[i] } + puts <<~END + Authority #{auth}: + * Conversion systems: #{auth_maps.count} + * Total number of rules: #{rule_counts.sum} + + END + end + + puts <<~END + Interesting facts: + * #{maps_by_rule_count.max_by { |i| i.last }.first} has the most rules + * Authority #{authorities.max_by { |i| i.last.count }.first} has the most systems + * Language #{languages.max_by { |i| i.last.count }.first} has the most systems + * Source script #{source_scripts.max_by { |i| i.last.count }.first} has the most systems + * Target script #{target_scripts.max_by { |i| i.last.count }.first} has the most systems + END + end + end +end diff --git a/ruby/lib/interscript/compiler.rb b/lib/interscript/compiler.rb similarity index 100% rename from ruby/lib/interscript/compiler.rb rename to lib/interscript/compiler.rb diff --git a/ruby/lib/interscript/compiler/javascript.rb b/lib/interscript/compiler/javascript.rb similarity index 84% rename from ruby/lib/interscript/compiler/javascript.rb rename to lib/interscript/compiler/javascript.rb index 13fe6eea..07a79ed4 100644 --- a/ruby/lib/interscript/compiler/javascript.rb +++ b/lib/interscript/compiler/javascript.rb @@ -53,6 +53,7 @@ def parallel_regexp_compile(subs_hash) def compile_rule(r, map = @map, wrapper = false) c = "" + return c if r.reverse_run == true case r when Interscript::Node::Stage c += "map.stages.#{r.name} = function(s) {\n" @@ -69,12 +70,13 @@ def compile_rule(r, map = @map, wrapper = false) # Try to build a tree a = [] r.children.each do |i| - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i - raise ArgumentError, "Can't parallelize rules with :before" if i.before - raise ArgumentError, "Can't parallelize rules with :after" if i.after - raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before - raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize rules with :before" if i.before + raise Interscript::SystemConversionError, "Can't parallelize rules with :after" if i.after + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_before" if i.not_before + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_after" if i.not_after + next if i.reverse_run == true a << [compile_item(i.from, map, :par), compile_item(i.to, map, :parstr)] end ah = a.hash.abs @@ -87,8 +89,9 @@ def compile_rule(r, map = @map, wrapper = false) # Otherwise let's build a megaregexp a = [] Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i| - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i - + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + + next if i.reverse_run == true a << [build_regexp(i, map), compile_item(i.to, map, :parstr)] end ah = a.hash.abs @@ -102,6 +105,8 @@ def compile_rule(r, map = @map, wrapper = false) from = %{"#{build_regexp(r, map).gsub("/", "\\\\/")}"} if r.to == :upcase to = 'function(a){return a.toUpperCase();}' + elsif r.to == :downcase + to = 'function(a){return a.toLowerCase();}' else to = compile_item(r.to, map, :str) end @@ -117,7 +122,7 @@ def compile_rule(r, map = @map, wrapper = false) end c += "s = Interscript.transliterate(#{stage.doc_name.to_json}, s, #{stage.name.to_json});\n" else - raise ArgumentError, "Can't compile unhandled #{r.class}" + raise Interscript::SystemConversionError, "Can't compile unhandled #{r.class}" end c end @@ -152,17 +157,17 @@ def compile_item i, doc=@map, target=nil astr = if i.map d = doc.dep_aliases[i.map].document a = d.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} of #{i.stage.map} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} of #{i.stage.map} not found" unless a "Interscript.get_alias_ALIASTYPE(#{a.doc_name.to_json}, #{a.name.to_json})" elsif Interscript::Stdlib::ALIASES.include?(i.name) if target != :re && Interscript::Stdlib.re_only_alias?(i.name) - raise ArgumentError, "Can't use #{i.name} in a #{target} context" + raise Interscript::SystemConversionError, "Can't use #{i.name} in a #{target} context" end stdlib_alias = true "Interscript.aliases.#{i.name}" else a = doc.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} not found" unless a "Interscript.get_alias_ALIASTYPE(#{a.doc_name.to_json}, #{a.name.to_json})" end @@ -200,7 +205,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::CaptureGroup if target != :re - raise ArgumentError, "Can't use a CaptureGroup in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a CaptureGroup in a #{target} context" end "(" + compile_item(i.data, doc, target) + ")" when Interscript::Node::Item::Maybe, @@ -212,7 +217,7 @@ def compile_item i, doc=@map, target=nil Interscript::Node::Item::MaybeSome => "*" }[i.class] if target == :par - raise ArgumentError, "Can't use a MaybeSome in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a MaybeSome in a #{target} context" end if Interscript::Node::Item::String === i.data && i.data.data.length != 1 "(?:" + compile_item(i.data, doc, target) + ")" + resuffix @@ -221,7 +226,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::CaptureRef if target == :par - raise ArgumentError, "Can't use CaptureRef in parallel mode" + raise Interscript::SystemConversionError, "Can't use CaptureRef in parallel mode" elsif target == :re "\\\\#{i.id}" elsif target == :str @@ -229,7 +234,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::Any if target == :str - raise ArgumentError, "Can't use Any in a string context" # A linter could find this! + raise Interscript::SystemConversionError, "Can't use Any in a string context" # A linter could find this! elsif target == :par i.data.map(&:data) elsif target == :re diff --git a/lib/interscript/compiler/python.rb b/lib/interscript/compiler/python.rb new file mode 100644 index 00000000..13722091 --- /dev/null +++ b/lib/interscript/compiler/python.rb @@ -0,0 +1,331 @@ +require 'pycall' + +class Interscript::Compiler::Python < Interscript::Compiler + def escape(val) + case val + when String, Integer + val.inspect + when Symbol + val.to_s.inspect + when Hash + "{"+ + val.map { |k,v| "#{escape k}:#{escape v}" }.join(",")+ + "}" + when Array + "[" + val.map { |i| escape i }.join(",") + "]" + when nil + "None" + else + pp [:error, val] + exit! + end + end + + def re_escape(val) + @pycall_regex ||= PyCall.import_module("regex") + @pycall_regex.escape(val).gsub("\\", "\\\\\\\\").gsub('"', "\\\\\"") + end + + def new_regexp(str) + "re.compile(\"#{str}\", re.MULTILINE)" + end + + def indent + @indent += 4 + yield + @indent -= 4 + end + + def emit(code) + @code << (" " * @indent) << code << "\n" + code + end + + def compile(map, debug: false) + @indent = 0 + @map = map + @debug = debug + @parallel_trees = {} + @parallel_regexps = {} + @code = "" + emit "import interscript" + emit "import regex as re" + map.dependencies.map(&:full_name).each do |dep| + emit "interscript.load_map(#{escape dep})" + end + + emit "interscript.stdlib.define_map(#{escape map.name})" + + map.aliases.each do |name, value| + val = compile_item(value.data, map, :str) + emit "interscript.stdlib.add_map_alias(#{escape map.name}, #{escape name}, #{val})" + val = "\"" + compile_item(value.data, map, :re) + "\"" + emit "interscript.stdlib.add_map_alias_re(#{escape map.name}, #{escape name}, #{val})" + end + + map.stages.each do |_, stage| + compile_rule(stage, @map, true) + end + @parallel_trees.each do |k,v| + emit "_PTREE_#{k} = #{escape v}" + end + @parallel_regexps.each do |k,v| + v = %{["#{v[0]}", #{escape v[1]}]} + emit "_PRE_#{k} = #{v}" + end + end + + def parallel_regexp_compile(subs_hash) + # puts subs_hash.inspect + regexp = subs_hash.each_with_index.map do |p,i| + "(?P<_%d>%s)" % [i,p[0]] + end.join("|") + subs_regexp = regexp + # puts subs_regexp.inspect + end + + def compile_rule(r, map = @map, wrapper = false) + return if r.reverse_run == true + case r + when Interscript::Node::Stage + if @debug + emit "if not hasattr(interscript, 'map_debug'):" + indent { emit "interscript.map_debug = []" } + end + emit "def _stage_#{r.name}(s):" + indent do + r.children.each do |t| + comp = compile_rule(t, map) + emit %{interscript.map_debug.append([s, #{escape @map.name.to_s}, #{escape r.name.to_s}, #{escape t.inspect}, #{escape comp}])} if @debug + end + emit "return s\n" + end + emit "interscript.stdlib.add_map_stage(#{escape @map.name}, #{escape r.name}, _stage_#{r.name})" + when Interscript::Node::Group::Parallel + begin + # Try to build a tree + a = [] + r.children.each do |i| + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize rules with :before" if i.before + raise Interscript::SystemConversionError, "Can't parallelize rules with :after" if i.after + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_before" if i.not_before + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_after" if i.not_after + + next if i.reverse_run == true + a << [compile_item(i.from, map, :par), compile_item(i.to, map, :parstr)] + end + ah = a.hash.abs + unless @parallel_trees.include? ah + tree = Interscript::Stdlib.parallel_replace_compile_tree(a) + @parallel_trees[ah] = tree + end + emit "s = interscript.stdlib.parallel_replace_tree(s, _PTREE_#{ah})" + rescue + # Otherwise let's build a megaregexp + a = [] + Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i| + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + + next if i.reverse_run == true + a << [build_regexp(i, map), compile_item(i.to, map, :parstr)] + end + ah = a.hash.abs + unless @parallel_regexps.include? ah + re = parallel_regexp_compile(a) + @parallel_regexps[ah] = [re, a.map(&:last)] + end + emit "s = interscript.stdlib.parallel_regexp_gsub(s, *_PRE_#{ah})" + end + when Interscript::Node::Rule::Sub + from = new_regexp build_regexp(r, map) + if r.to == :upcase + to = 'interscript.stdlib.upper' + elsif r.to == :downcase + to = 'interscript.stdlib.lower' + else + to = compile_item(r.to, map, :str) + end + emit "s = #{from}.sub(#{to}, s)" + when Interscript::Node::Rule::Funcall + emit "s = interscript.functions.#{r.name}(s, #{escape r.kwargs})" + when Interscript::Node::Rule::Run + if r.stage.map + doc = map.dep_aliases[r.stage.map].document + stage = doc.imported_stages[r.stage.name] + else + stage = map.imported_stages[r.stage.name] + end + emit "s = interscript.transliterate(#{escape stage.doc_name}, s, #{escape stage.name})" + else + raise Interscript::SystemConversionError, "Can't compile unhandled #{r.class}" + end + end + + def build_regexp(r, map=@map) + from = compile_item(r.from, map, :re) + before = compile_item(r.before, map, :re) if r.before + after = compile_item(r.after, map, :re) if r.after + not_before = compile_item(r.not_before, map, :re) if r.not_before + not_after = compile_item(r.not_after, map, :re) if r.not_after + + re = "" + re += "(?<=#{before})" if before + re += "(? "?" , + Interscript::Node::Item::Some => "+" , + Interscript::Node::Item::MaybeSome => "*" }[i.class] + + if target == :par + raise Interscript::SystemConversionError, "Can't use a Maybe in a #{target} context" + end + if Interscript::Node::Item::String === i.data && i.data.data.length != 1 + "(?:" + compile_item(i.data, doc, target) + ")" + resuffix + else + compile_item(i.data, doc, target) + resuffix + end + when Interscript::Node::Item::CaptureRef + if target == :par + raise Interscript::SystemConversionError, "Can't use CaptureRef in parallel mode" + elsif target == :re + "\\\\#{i.id}" + elsif target == :str + "\"\\\\#{i.id}\"" + end + when Interscript::Node::Item::Any + if target == :str + raise Interscript::SystemConversionError, "Can't use Any in a string context" # A linter could find this! + elsif target == :par + i.data.map(&:data) + elsif target == :re + case i.value + when Array + data = i.data.map { |j| compile_item(j, doc, target) } + "(?:"+data.join("|")+")" + when String + "[#{re_escape(i.value)}]" + when Range + "[#{re_escape(i.value.first)}-#{re_escape(i.value.last)}]" + end + end + end + end + + @maps_loaded = {} + @ctx = nil + class << self + attr_accessor :maps_loaded + attr_accessor :ctx + end + + def load + if !self.class.maps_loaded[@map.name] + @map.dependencies.each do |dep| + dep = dep.full_name + if !self.class.maps_loaded[dep] + Interscript.load(dep, compiler: self.class).load + end + end + + ctx = self.class.ctx + python_src_path = File.join(__dir__, '..', '..', '..', '..', 'python', 'src') + unless ctx + PyCall.sys.path.append(python_src_path) + self.class.ctx = PyCall.import_module("interscript") + end + #puts @code + Dir.mkdir("#{python_src_path}/interscript/maps") rescue nil + File.write("#{python_src_path}/interscript/maps/#{@map.name}.py", @code) + self.class.ctx.load_map(@map.name) + + self.class.maps_loaded[@map.name] = true + end + end + + def call(str, stage=:main) + load + self.class.ctx.transliterate(@map.name, str, stage.to_s) + end + + def self.read_debug_data + (ctx['map_debug'] || []).map(&:to_a).to_a + end + + def self.reset_debug_data + ctx['map_debug'].clear + end +end diff --git a/ruby/lib/interscript/compiler/ruby.rb b/lib/interscript/compiler/ruby.rb similarity index 84% rename from ruby/lib/interscript/compiler/ruby.rb rename to lib/interscript/compiler/ruby.rb index 74d78168..dcdefcdf 100644 --- a/ruby/lib/interscript/compiler/ruby.rb +++ b/lib/interscript/compiler/ruby.rb @@ -42,6 +42,7 @@ def compile(map, debug: false) def compile_rule(r, map = @map, wrapper = false) c = "" + return c if r.reverse_run == true case r when Interscript::Node::Stage c += "Interscript::Maps.add_map_stage \"#{@map.name}\", #{r.name.inspect} do |s|\n" @@ -59,12 +60,13 @@ def compile_rule(r, map = @map, wrapper = false) # Try to build a tree a = [] r.children.each do |i| - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i - raise ArgumentError, "Can't parallelize rules with :before" if i.before - raise ArgumentError, "Can't parallelize rules with :after" if i.after - raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before - raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize rules with :before" if i.before + raise Interscript::SystemConversionError, "Can't parallelize rules with :after" if i.after + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_before" if i.not_before + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_after" if i.not_after + next if i.reverse_run == true a << [compile_item(i.from, map, :par), compile_item(i.to, map, :parstr)] end ah = a.hash.abs @@ -77,8 +79,9 @@ def compile_rule(r, map = @map, wrapper = false) # Otherwise let's build a megaregexp a = [] Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i| - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + next if i.reverse_run == true a << [build_regexp(i, map), compile_item(i.to, map, :parstr)] end ah = a.hash.abs @@ -92,6 +95,8 @@ def compile_rule(r, map = @map, wrapper = false) from = "/#{build_regexp(r, map).gsub("/", "\\\\/")}/" if r.to == :upcase to = '&:upcase' + elsif r.to == :downcase + to = '&:downcase' else to = compile_item(r.to, map, :str) end @@ -107,7 +112,7 @@ def compile_rule(r, map = @map, wrapper = false) end c += "s = Interscript::Maps.transliterate(#{stage.doc_name.inspect}, s, #{stage.name.inspect})\n" else - raise ArgumentError, "Can't compile unhandled #{r.class}" + raise Interscript::SystemConversionError, "Can't compile unhandled #{r.class}" end c end @@ -141,17 +146,17 @@ def compile_item i, doc=@map, target=nil astr = if i.map d = doc.dep_aliases[i.map].document a = d.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} of #{i.stage.map} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} of #{i.stage.map} not found" unless a "Interscript::Maps.get_alias_ALIASTYPE(#{a.doc_name.inspect}, #{a.name.inspect})" elsif Interscript::Stdlib::ALIASES.include?(i.name) if target != :re && Interscript::Stdlib.re_only_alias?(i.name) - raise ArgumentError, "Can't use #{i.name} in a #{target} context" + raise Interscript::SystemConversionError, "Can't use #{i.name} in a #{target} context" end stdlib_alias = true "Interscript::Stdlib::ALIASES[#{i.name.inspect}]" else a = doc.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} not found" unless a "Interscript::Maps.get_alias_ALIASTYPE(#{a.doc_name.inspect}, #{a.name.inspect})" end @@ -189,7 +194,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::CaptureGroup if target != :re - raise ArgumentError, "Can't use a CaptureGroup in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a CaptureGroup in a #{target} context" end "(" + compile_item(i.data, doc, target) + ")" when Interscript::Node::Item::Maybe, @@ -201,7 +206,7 @@ def compile_item i, doc=@map, target=nil Interscript::Node::Item::MaybeSome => "*" }[i.class] if target == :par - raise ArgumentError, "Can't use a Maybe in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a Maybe in a #{target} context" end if Interscript::Node::Item::String === i.data && i.data.data.length != 1 "(?:" + compile_item(i.data, doc, target) + ")" + resuffix @@ -210,7 +215,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::CaptureRef if target == :par - raise ArgumentError, "Can't use CaptureRef in parallel mode" + raise Interscript::SystemConversionError, "Can't use CaptureRef in parallel mode" elsif target == :re "\\#{i.id}" elsif target == :str @@ -218,7 +223,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::Any if target == :str - raise ArgumentError, "Can't use Any in a string context" # A linter could find this! + raise Interscript::SystemConversionError, "Can't use Any in a string context" # A linter could find this! elsif target == :par i.data.map(&:data) elsif target == :re diff --git a/lib/interscript/detector.rb b/lib/interscript/detector.rb new file mode 100644 index 00000000..b22f43cf --- /dev/null +++ b/lib/interscript/detector.rb @@ -0,0 +1,62 @@ +require "text" + +class Interscript::Detector + attr_accessor :compiler + attr_accessor :distance_computer + attr_accessor :map_pattern + + # TODO: use transliterate_each + attr_accessor :each + + attr_accessor :load_path + attr_accessor :cache + + # Returns a summary of all detected transliterations + attr_accessor :multiple + + def initialize + @compiler = Interscript::Interpreter + @distance_computer = DistanceComputer::Levenshtein + @map_pattern = "*" + + @each = false + + @load_path = false + @cache = CACHE + end + + def set_from_kwargs(**kwargs) + kwargs.each do |k,v| + self.public_send(:"#{k}=", v) + end + end + + def call(source, destination) + maps = Interscript.maps(select: @map_pattern, load_path: @load_path) + maps = Interscript.exclude_maps(maps, compiler: self.class) + maps = Interscript.exclude_maps(maps, compiler: @compiler) + + summary = maps.map do |map| + try_dest = Interscript.transliterate(map, source, compiler: @compiler) + + [map, try_dest] + end.map do |map, try_dest| + dist = @distance_computer.(try_dest, destination) + + [map, dist] + end.sort_by(&:last).to_h + + if @multiple + summary.to_h + else + summary.first.first + end + end + + CACHE = {} + + # A DistanceComputer needs to respond to #call(source, destination) + module DistanceComputer + Levenshtein = Text::Levenshtein.method(:distance) + end +end \ No newline at end of file diff --git a/ruby/lib/interscript/dsl.rb b/lib/interscript/dsl.rb similarity index 53% rename from ruby/lib/interscript/dsl.rb rename to lib/interscript/dsl.rb index bf1658c2..60301454 100644 --- a/ruby/lib/interscript/dsl.rb +++ b/lib/interscript/dsl.rb @@ -2,12 +2,45 @@ module Interscript::DSL @cache = {} - def self.parse(map_name) + def self.parse(map_name, reverse: true) # map name aliases? here may be a place to wrap it return @cache[map_name] if @cache[map_name] - path = Interscript.locate(map_name) + + # This is a composition, so let's make a new virtual map + # that calls all maps in a sequence. + if map_name.include? "|" + map_parts = map_name.split("|").map(&:strip) + + doc = Interscript::DSL::Document.new(map_name) do + map_parts.each_with_index do |i, idx| + dependency i, as: :"part#{idx}" + end + + stage { + map_parts.each_with_index do |i, idx| + run map[:"part#{idx}"].stage.main + end + } + end.node + + return @cache[map_name] = doc + end + + path = begin + Interscript.locate(map_name) + rescue Interscript::MapNotFoundError => e + # But maybe we called the map in a reversed fashion? + begin + raise e if reverse == false # Protect from an infinite loop + reverse_name = Interscript::Node::Document.reverse_name(map_name) + return @cache[map_name] = parse(reverse_name, reverse: false).reverse + rescue Interscript::MapNotFoundError + raise e + end + end library = path.end_with?(".iml") + map_name = File.basename(path, ".imp") map_name = File.basename(map_name, ".iml") @@ -34,7 +67,7 @@ def self.parse(map_name) ruby << l end end - raise ArgumentError, "metadata stage isn't terminated" if md_reading + raise Interscript::MapLogicError, "metadata stage isn't terminated" if md_reading ruby, yaml = ruby.join("\n"), yaml.join("\n") obj = Interscript::DSL::Document.new(map_name) @@ -43,7 +76,12 @@ def self.parse(map_name) yaml = if yaml =~ /\A\s*\z/ {} else - YAML.load(yaml, exc_fname) + unsafe_load = if YAML.respond_to? :unsafe_load + :unsafe_load + else + :load + end + YAML.public_send(unsafe_load, yaml, filename: exc_fname) end md = Interscript::DSL::Metadata.new(yaml: true, map_name: map_name, library: library) do diff --git a/ruby/lib/interscript/dsl/aliases.rb b/lib/interscript/dsl/aliases.rb similarity index 82% rename from ruby/lib/interscript/dsl/aliases.rb rename to lib/interscript/dsl/aliases.rb index 62e6af65..da9c831f 100644 --- a/ruby/lib/interscript/dsl/aliases.rb +++ b/lib/interscript/dsl/aliases.rb @@ -14,7 +14,7 @@ def def_alias(name, value) end unless Symbol === name - raise TypeError, "Alias name must be a Symbol, given #{name.class}" + raise Interscript::SystemConversionError, "Alias name must be a Symbol, given #{name.class}" end puts "def_alias(#{name.inspect}, #{thing.inspect})" if $DEBUG diff --git a/ruby/lib/interscript/dsl/document.rb b/lib/interscript/dsl/document.rb similarity index 92% rename from ruby/lib/interscript/dsl/document.rb rename to lib/interscript/dsl/document.rb index cddd7d75..89f35c98 100644 --- a/ruby/lib/interscript/dsl/document.rb +++ b/lib/interscript/dsl/document.rb @@ -37,10 +37,11 @@ def dependency(full_name, **kargs) @node.dep_aliases[dep.name] = dep if dep.name end - def stage(name = :main, &block) + def stage(name = :main, dont_reverse: false, &block) puts "stage(#{name}) from #{self.inspect}" if $DEBUG stage = Interscript::DSL::Stage.new(name, &block) stage.node.doc_name = @node.name + stage.node.dont_reverse = dont_reverse @node.stages[name] = stage.node end end diff --git a/ruby/lib/interscript/dsl/group.rb b/lib/interscript/dsl/group.rb similarity index 69% rename from ruby/lib/interscript/dsl/group.rb rename to lib/interscript/dsl/group.rb index c5281db5..1283a16f 100644 --- a/ruby/lib/interscript/dsl/group.rb +++ b/lib/interscript/dsl/group.rb @@ -8,16 +8,16 @@ def initialize(&block) self.instance_exec(&block) end - def run(stage) + def run(stage, **kwargs) if stage.class != Interscript::Node::Item::Stage - raise TypeError, "I::Node::Item::Stage expected, got #{stage.class}" + raise Interscript::MapLogicError, "I::Node::Item::Stage expected, got #{stage.class}" end - @node.children << Interscript::Node::Rule::Run.new(stage) + @node.children << Interscript::Node::Rule::Run.new(stage, **kwargs) end def sub(from, to, **kwargs, &block) - puts "sub(#{from.inspect},#{to}, kargs = #{ - kargs.inspect + puts "sub(#{from.inspect},#{to}, kwargs = #{ + kwargs.inspect }) from #{self.inspect}" if $DEBUG rule = Interscript::Node::Rule::Sub.new(from, to, **kwargs) @@ -25,6 +25,7 @@ def sub(from, to, **kwargs, &block) end def upcase; :upcase; end + def downcase; :downcase; end Interscript::Stdlib.available_functions.each do |fun| define_method fun do |**kwargs| @@ -35,9 +36,9 @@ def upcase; :upcase; end end end - def parallel(&block) + def parallel(**kwargs, &block) puts "parallel(#{chars.inspect}) from #{self.inspect}" if $DEBUG - group = Interscript::DSL::Group::Parallel.new(&block) + group = Interscript::DSL::Group::Parallel.new(**kwargs, &block) @node.children << group.node end end diff --git a/lib/interscript/dsl/group/parallel.rb b/lib/interscript/dsl/group/parallel.rb new file mode 100644 index 00000000..d19dbff3 --- /dev/null +++ b/lib/interscript/dsl/group/parallel.rb @@ -0,0 +1,6 @@ +class Interscript::DSL::Group::Parallel < Interscript::DSL::Group + def initialize(reverse_run: nil, &block) + @node = Interscript::Node::Group::Parallel.new(reverse_run: reverse_run) + self.instance_exec(&block) + end +end diff --git a/ruby/lib/interscript/dsl/items.rb b/lib/interscript/dsl/items.rb similarity index 90% rename from ruby/lib/interscript/dsl/items.rb rename to lib/interscript/dsl/items.rb index ef286128..6b80d64b 100644 --- a/ruby/lib/interscript/dsl/items.rb +++ b/lib/interscript/dsl/items.rb @@ -55,7 +55,7 @@ module Maps class << self # Select a remote map def [] map - Symbol === map or raise TypeError, "A map name must be a Symbol, not #{alias_name.class}" + Symbol === map or raise Interscript::MapLogicError, "A map name must be a Symbol, not #{alias_name.class}" Map.new(map) end alias method_missing [] @@ -68,7 +68,7 @@ def initialize name; @name = name; end # Implementation of `map.x.aliasname` def [] alias_name - Symbol === alias_name or raise TypeError, "An alias name must be a Symbol, not #{alias_name.class}" + Symbol === alias_name or raise Interscript::MapLogicError, "An alias name must be a Symbol, not #{alias_name.class}" Interscript::Node::Item::Alias.new(alias_name, map: @name) end alias method_missing [] diff --git a/ruby/lib/interscript/dsl/metadata.rb b/lib/interscript/dsl/metadata.rb similarity index 82% rename from ruby/lib/interscript/dsl/metadata.rb rename to lib/interscript/dsl/metadata.rb index 7feb4b59..85c19497 100644 --- a/ruby/lib/interscript/dsl/metadata.rb +++ b/lib/interscript/dsl/metadata.rb @@ -4,7 +4,7 @@ class Interscript::DSL::Metadata attr_accessor :node def initialize(yaml: false, map_name: "", library: true, &block) - raise ArgumentError, "Can't evaluate metadata from Ruby context" unless yaml + raise Interscript::MapLogicError, "Can't evaluate metadata from Ruby context" unless yaml @map_name = map_name @node = Interscript::Node::MetaData.new self.instance_exec(&block) @@ -20,13 +20,12 @@ def initialize(yaml: false, map_name: "", library: true, &block) STANDARD_STRING_KEYS = %i{authority_id id language source_script destination_script - name url creation_date adoption_date description - character source confirmation_date} + name creation_date adoption_date description + character source confirmation_date original_description} - STANDARD_ARRAY_KEYS = %i{notes} + STANDARD_ARRAY_KEYS = %i{notes implementation_notes original_notes url} - NONSTANDARD_KEYS = %i{special_rules original_description original_notes - implementation_notes} + NONSTANDARD_KEYS = %i{special_rules} NECESSARY_KEYS = %i{name language source_script destination_script} diff --git a/ruby/lib/interscript/dsl/stage.rb b/lib/interscript/dsl/stage.rb similarity index 100% rename from ruby/lib/interscript/dsl/stage.rb rename to lib/interscript/dsl/stage.rb diff --git a/ruby/lib/interscript/dsl/symbol_mm.rb b/lib/interscript/dsl/symbol_mm.rb similarity index 100% rename from ruby/lib/interscript/dsl/symbol_mm.rb rename to lib/interscript/dsl/symbol_mm.rb diff --git a/ruby/lib/interscript/dsl/tests.rb b/lib/interscript/dsl/tests.rb similarity index 68% rename from ruby/lib/interscript/dsl/tests.rb rename to lib/interscript/dsl/tests.rb index 64bf195b..601611b4 100644 --- a/ruby/lib/interscript/dsl/tests.rb +++ b/lib/interscript/dsl/tests.rb @@ -6,7 +6,7 @@ def initialize(&block) self.instance_exec(&block) end - def test(from,to) - @node << [from, to] + def test(from, to, reverse_run: nil) + @node << [from, to, reverse_run] end end diff --git a/ruby/lib/interscript/interpreter.rb b/lib/interscript/interpreter.rb similarity index 82% rename from ruby/lib/interscript/interpreter.rb rename to lib/interscript/interpreter.rb index 35da6235..7db1dfb2 100644 --- a/ruby/lib/interscript/interpreter.rb +++ b/lib/interscript/interpreter.rb @@ -76,6 +76,7 @@ def initialize(map, str) end def execute_rule r + return if r.reverse_run == true case r when Interscript::Node::Group::Parallel if r.cached_tree @@ -91,11 +92,12 @@ def execute_rule r # Try to build a tree subs_array = [] r.children.each do |i| - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i - raise ArgumentError, "Can't parallelize rules with :before" if i.before - raise ArgumentError, "Can't parallelize rules with :after" if i.after - raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before - raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize rules with :before" if i.before + raise Interscript::SystemConversionError, "Can't parallelize rules with :after" if i.after + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_before" if i.not_before + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_after" if i.not_after + next if i.reverse_run == true subs_array << [build_item(i.from, :par), build_item(i.to, :parstr)] end tree = Interscript::Stdlib.parallel_replace_compile_tree(subs_array) #.sort_by{|k,v| -k.length}) @@ -107,8 +109,8 @@ def execute_rule r # Otherwise let's build a megaregexp subs_array = [] Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i| # rule.from.max_length gives somewhat better test results, why is that - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i - + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + next if i.reverse_run == true subs_array << [build_regexp(i), build_item(i.to, :parstr)] end r.subs_regexp = Interscript::Stdlib.parallel_regexp_compile(subs_array) @@ -129,6 +131,8 @@ def execute_rule r when Interscript::Node::Rule::Sub if r.to == :upcase @str = @str.gsub(Regexp.new(build_regexp(r)), &:upcase) + elsif r.to == :downcase + @str = @str.gsub(Regexp.new(build_regexp(r)), &:downcase) else @str = @str.gsub(Regexp.new(build_regexp(r)), build_item(r.to, :str)) end @@ -174,16 +178,16 @@ def build_item i, target=nil, doc=@map if i.map d = doc.dep_aliases[i.map].document a = d.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} of #{i.stage.map} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} of #{i.stage.map} not found" unless a build_item(a.data, target, d) elsif Interscript::Stdlib::ALIASES.include?(i.name) if target != :re && Interscript::Stdlib.re_only_alias?(i.name) - raise ArgumentError, "Can't use #{i.name} in a #{target} context" + raise Interscript::SystemConversionError, "Can't use #{i.name} in a #{target} context" end Interscript::Stdlib::ALIASES[i.name] else a = doc.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} not found" unless a build_item(a.data, target, doc) end when Interscript::Node::Item::String @@ -204,7 +208,7 @@ def build_item i, target=nil, doc=@map end when Interscript::Node::Item::CaptureGroup if target == :par - raise ArgumentError, "Can't use a CaptureGroup in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a CaptureGroup in a #{target} context" end "(" + build_item(i.data, target, doc) + ")" when Interscript::Node::Item::Maybe, @@ -216,7 +220,7 @@ def build_item i, target=nil, doc=@map Interscript::Node::Item::MaybeSome => "*" }[i.class] if target == :par - raise ArgumentError, "Can't use a MaybeSome in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a MaybeSome in a #{target} context" end if Interscript::Node::Item::String === i.data && i.data.data.length != 1 "(?:" + build_item(i.data, target, doc) + ")" + resuffix @@ -225,13 +229,13 @@ def build_item i, target=nil, doc=@map end when Interscript::Node::Item::CaptureRef if target == :par - raise ArgumentError, "Can't use CaptureRef in parallel mode" + raise Interscript::SystemConversionError, "Can't use CaptureRef in parallel mode" end "\\#{i.id}" when Interscript::Node::Item::Any if target == :str # We may never reach this point - raise ArgumentError, "Can't use Any in a string context" + raise Interscript::SystemConversionError, "Can't use Any in a string context" elsif target == :par i.data.map(&:data) elsif target == :re diff --git a/ruby/lib/interscript/node.rb b/lib/interscript/node.rb similarity index 91% rename from ruby/lib/interscript/node.rb rename to lib/interscript/node.rb index eb7e6a6f..bb8569c8 100644 --- a/ruby/lib/interscript/node.rb +++ b/lib/interscript/node.rb @@ -4,6 +4,10 @@ def initialize raise NotImplementedError, "You can't construct a Node directly" end + def ==(other) + self.class == other.class + end + def to_hash { :class => self.class.to_s, :question => "is something missing?" diff --git a/ruby/lib/interscript/node/alias_def.rb b/lib/interscript/node/alias_def.rb similarity index 77% rename from ruby/lib/interscript/node/alias_def.rb rename to lib/interscript/node/alias_def.rb index 36b21b32..48eb1912 100644 --- a/ruby/lib/interscript/node/alias_def.rb +++ b/lib/interscript/node/alias_def.rb @@ -7,6 +7,12 @@ def initialize(name, data) @data = data end + def ==(other) + super && + self.name == other.name && + self.data == other.data + end + def to_hash { :class => self.class.to_s, :name => @name, diff --git a/lib/interscript/node/dependency.rb b/lib/interscript/node/dependency.rb new file mode 100644 index 00000000..1afbb26d --- /dev/null +++ b/lib/interscript/node/dependency.rb @@ -0,0 +1,29 @@ +class Interscript::Node::Dependency < Interscript::Node + attr_accessor :name, :full_name, :import, :document + + def initialize + end + + def reverse + rdep = self.class.new + rdep.name = name + rdep.full_name = Interscript::Node::Document.reverse_name(full_name) + rdep.import = import + rdep.document = document&.reverse + rdep + end + + def ==(other) + super && + self.full_name == other.full_name && + self.import == other.import && + self.name == other.name + end + + def to_hash + { :class => self.class.to_s, + :name => @name, + :full_name => @full_name, + :import => @import } + end +end diff --git a/ruby/lib/interscript/node/document.rb b/lib/interscript/node/document.rb similarity index 55% rename from ruby/lib/interscript/node/document.rb rename to lib/interscript/node/document.rb index 9a24780e..2eea64e1 100644 --- a/ruby/lib/interscript/node/document.rb +++ b/lib/interscript/node/document.rb @@ -1,6 +1,7 @@ class Interscript::Node::Document attr_accessor :metadata, :tests, :name attr_accessor :dependencies, :aliases, :stages, :dep_aliases + attr_accessor :reversed_from def initialize puts "Interscript::Node::Document.new " if $DEBUG @@ -34,6 +35,39 @@ def all_dependencies end end + def reverse + @reverse ||= self.class.new.tap do |rdoc| + rdoc.name = self.class.reverse_name(name) + rdoc.metadata = metadata&.reverse + rdoc.tests = tests&.reverse + rdoc.dependencies = dependencies.map(&:reverse) + rdoc.stages = stages.transform_values(&:reverse) + rdoc.dep_aliases = dep_aliases.transform_values(&:reverse) + rdoc.aliases = aliases + end + end + + def self.reverse_name(name) + newname = (name || "noname").split("-") + newname[2], newname[3] = newname[3], newname[2] if newname.length >= 4 + newname = newname.join("-") + if newname == name + newname.gsub!("-reverse", "") + end + if newname == name + newname += "-reverse" + end + newname + end + + def ==(other) + self.class == other.class && + self.metadata == other.metadata && + self.tests == other.tests && + self.stages == other.stages && + self.aliases == other.aliases + end + def to_hash { :class => self.class.to_s, :metadata => @metadata&.to_hash, :tests => @tests&.to_hash, diff --git a/ruby/lib/interscript/node/group.rb b/lib/interscript/node/group.rb similarity index 66% rename from ruby/lib/interscript/node/group.rb rename to lib/interscript/node/group.rb index d3744d42..0b86212c 100644 --- a/ruby/lib/interscript/node/group.rb +++ b/lib/interscript/node/group.rb @@ -1,7 +1,8 @@ class Interscript::Node::Group < Interscript::Node - attr_accessor :children + attr_accessor :children, :reverse_run - def initialize + def initialize(reverse_run: nil) + @reverse_run = reverse_run @children = [] end @@ -20,11 +21,21 @@ def apply_order(order) self end + def reverse + self.class.new(reverse_run: reverse_run.nil? ? nil : !reverse_run).tap do |r| + r.children = self.children.reverse.map(&:reverse) + end + end + def to_hash { :class => self.class.to_s, :children => @children.map{|x| x.to_hash} } end + def ==(other) + super && self.children == other.children && self.reverse_run == other.reverse_run + end + def inspect @children.map(&:inspect).join("\n").gsub(/^/, " ") end diff --git a/ruby/lib/interscript/node/group/parallel.rb b/lib/interscript/node/group/parallel.rb similarity index 100% rename from ruby/lib/interscript/node/group/parallel.rb rename to lib/interscript/node/group/parallel.rb diff --git a/ruby/lib/interscript/node/group/sequential.rb b/lib/interscript/node/group/sequential.rb similarity index 100% rename from ruby/lib/interscript/node/group/sequential.rb rename to lib/interscript/node/group/sequential.rb diff --git a/ruby/lib/interscript/node/item.rb b/lib/interscript/node/item.rb similarity index 89% rename from ruby/lib/interscript/node/item.rb rename to lib/interscript/node/item.rb index b0be14e1..fe2e81de 100644 --- a/ruby/lib/interscript/node/item.rb +++ b/lib/interscript/node/item.rb @@ -36,9 +36,13 @@ def to_hash :item => self.item } end + def ==(other) + super + end + def self.try_convert(i) i = Interscript::Node::Item::String.new(i) if i.class == ::String - raise TypeError, "Wrong type #{i.class}, expected I::Node::Item" unless Interscript::Node::Item === i + raise Interscript::MapLogicError, "Wrong type #{i.class}, expected I::Node::Item" unless Interscript::Node::Item === i i end end diff --git a/ruby/lib/interscript/node/item/alias.rb b/lib/interscript/node/item/alias.rb similarity index 75% rename from ruby/lib/interscript/node/item/alias.rb rename to lib/interscript/node/item/alias.rb index 8b2c04a1..cfd4a58d 100644 --- a/ruby/lib/interscript/node/item/alias.rb +++ b/lib/interscript/node/item/alias.rb @@ -10,6 +10,10 @@ def stdlib? !map && Interscript::Stdlib::ALIASES.has_key?(name) end + def boundary_like? + Interscript::Stdlib.boundary_like_alias?(name) + end + def max_length if stdlib? ([:none].include? name) ? 0 : 1 @@ -19,6 +23,10 @@ def max_length end end + # Not implemented properly + def downcase; self; end + def upcase; self; end + def first_string self end @@ -32,6 +40,10 @@ def to_hash } end + def ==(other) + super && self.name == other.name && self.map == other.map + end + def inspect if map "map.#{map}.#{name}" diff --git a/ruby/lib/interscript/node/item/any.rb b/lib/interscript/node/item/any.rb similarity index 85% rename from ruby/lib/interscript/node/item/any.rb rename to lib/interscript/node/item/any.rb index e728f933..6e5a786f 100644 --- a/ruby/lib/interscript/node/item/any.rb +++ b/lib/interscript/node/item/any.rb @@ -10,7 +10,7 @@ def initialize data self.value = Interscript::Stdlib::ALIASES[data.name] else puts data.inspect - raise TypeError, "Wrong type #{data[0].class}, excepted Array, String or Range" + raise Interscript::MapLogicError, "Wrong type #{data[0].class}, excepted Array, String or Range" end end @@ -25,6 +25,9 @@ def data end end + def downcase; self.class.new(self.data.map(&:downcase)); end + def upcase; self.class.new(self.data.map(&:upcase)); end + def first_string case @value when Array @@ -70,6 +73,10 @@ def to_hash hash end + def ==(other) + super && self.data == other.data + end + def inspect "any(#{value.inspect})" end diff --git a/ruby/lib/interscript/node/item/capture.rb b/lib/interscript/node/item/capture.rb similarity index 75% rename from ruby/lib/interscript/node/item/capture.rb rename to lib/interscript/node/item/capture.rb index 81cea131..7e9502ea 100644 --- a/ruby/lib/interscript/node/item/capture.rb +++ b/lib/interscript/node/item/capture.rb @@ -15,11 +15,18 @@ def nth_string data.nth_string end + def downcase; self.dup.tap { |i| i.data = i.data.downcase }; end + def upcase; self.dup.tap { |i| i.data = i.data.upcase }; end + def to_hash { :class => self.class.to_s, :data => self.data.to_hash } end + def ==(other) + super && self.data == other.data + end + def inspect "capture(#{@data.inspect})" end @@ -44,6 +51,10 @@ def to_hash :id => self.id } end + def ==(other) + super && self.id == other.id + end + def inspect "ref(#{@id.inspect})" end diff --git a/ruby/lib/interscript/node/item/group.rb b/lib/interscript/node/item/group.rb similarity index 53% rename from ruby/lib/interscript/node/item/group.rb rename to lib/interscript/node/item/group.rb index f538e364..b503efa1 100644 --- a/ruby/lib/interscript/node/item/group.rb +++ b/lib/interscript/node/item/group.rb @@ -10,11 +10,35 @@ def initialize *children def +(item) item = Interscript::Node::Item.try_convert(item) out = self.dup - out.children << item + if Interscript::Node::Item::Group === item + out.children += item.children + else + out.children << item + end out.verify! out end + def compact + out = self.dup do |n| + n.children = n.children.reject do |i| + (Interscript::Node::Alias === i && i.name == :none) || + (Interscript::Node::String === i && i.data == "") + end + end + + if out.children.count == 0 + Interscript::Node::Alias.new(:none) + elsif out.children.count == 1 + out.children.first + else + out + end + end + + def downcase; self.dup.tap { |i| i.children = i.children.map(&:downcase) }; end + def upcase; self.dup.tap { |i| i.children = i.children.map(&:upcase) }; end + # Verify if a group is valid def verify! wrong = @children.find do |i| @@ -24,7 +48,7 @@ def verify! end if wrong - raise TypeError, "An I::Node::Item::Group can't contain an #{wrong.class} item." + raise Interscript::MapLogicError, "An I::Node::Item::Group can't contain an #{wrong.class} item." end end @@ -45,6 +69,10 @@ def to_hash :children => self.children.map{|x| x.to_hash} } end + def ==(other) + super && self.children == other.children + end + def inspect @children.map(&:inspect).join("+") end diff --git a/ruby/lib/interscript/node/item/repeat.rb b/lib/interscript/node/item/repeat.rb similarity index 93% rename from ruby/lib/interscript/node/item/repeat.rb rename to lib/interscript/node/item/repeat.rb index ff7a98e1..a0f21570 100644 --- a/ruby/lib/interscript/node/item/repeat.rb +++ b/lib/interscript/node/item/repeat.rb @@ -22,6 +22,10 @@ def to_hash :data => self.data.to_hash } end + def ==(other) + super && self.data == other.data + end + def inspect str = case self when Interscript::Node::Item::Maybe diff --git a/ruby/lib/interscript/node/item/stage.rb b/lib/interscript/node/item/stage.rb similarity index 82% rename from ruby/lib/interscript/node/item/stage.rb rename to lib/interscript/node/item/stage.rb index 4c0036e2..a4c95de7 100644 --- a/ruby/lib/interscript/node/item/stage.rb +++ b/lib/interscript/node/item/stage.rb @@ -13,6 +13,10 @@ def to_hash } end + def ==(other) + super && self.name == other.name && self.map == other.map + end + def inspect if map "map.#{@map}.stage.#{@name}" diff --git a/ruby/lib/interscript/node/item/string.rb b/lib/interscript/node/item/string.rb similarity index 83% rename from ruby/lib/interscript/node/item/string.rb rename to lib/interscript/node/item/string.rb index 9a9f9765..7c7853bb 100644 --- a/ruby/lib/interscript/node/item/string.rb +++ b/lib/interscript/node/item/string.rb @@ -17,6 +17,9 @@ def first_string self.data end + def downcase; self.dup.tap { |i| i.data = i.data.downcase }; end + def upcase; self.dup.tap { |i| i.data = i.data.upcase }; end + alias nth_string first_string def + other @@ -33,6 +36,10 @@ def + other end end + def ==(other) + super && self.data == other.data + end + def inspect @data.inspect end diff --git a/ruby/lib/interscript/node/metadata.rb b/lib/interscript/node/metadata.rb similarity index 53% rename from ruby/lib/interscript/node/metadata.rb rename to lib/interscript/node/metadata.rb index 3c651e49..28b4f7e3 100644 --- a/ruby/lib/interscript/node/metadata.rb +++ b/lib/interscript/node/metadata.rb @@ -11,6 +11,16 @@ def [](k) @data[k] end + def reverse + self.class.new(data.dup, **{}).tap do |rmd| + rmd[:source_script], rmd[:destination_script] = rmd[:destination_script], rmd[:source_script] + end + end + + def ==(other) + super && self.data == other.data + end + def to_hash {:class => self.class.to_s, :data => @data} diff --git a/ruby/lib/interscript/node/rule.rb b/lib/interscript/node/rule.rb similarity index 69% rename from ruby/lib/interscript/node/rule.rb rename to lib/interscript/node/rule.rb index fefd517e..fb0b15b7 100644 --- a/ruby/lib/interscript/node/rule.rb +++ b/lib/interscript/node/rule.rb @@ -1,4 +1,7 @@ class Interscript::Node::Rule < Interscript::Node + def ==(other) + super && self.reverse_run == other.reverse_run + end end require "interscript/node/rule/sub" diff --git a/lib/interscript/node/rule/funcall.rb b/lib/interscript/node/rule/funcall.rb new file mode 100644 index 00000000..e6a54874 --- /dev/null +++ b/lib/interscript/node/rule/funcall.rb @@ -0,0 +1,28 @@ +class Interscript::Node::Rule::Funcall < Interscript::Node::Rule + attr_accessor :name, :kwargs, :reverse_run + def initialize name, reverse_run: nil, **kwargs + @name = name + @reverse_run = reverse_run + @kwargs = kwargs + end + + def to_hash + { :class => self.class.to_s, + :name => self.name, + :kwargs => self.kwargs + } + end + + def reverse + self.class.new(Interscript::Stdlib.reverse_function[@name.to_sym], + reverse_run: reverse_run.nil? ? nil : !reverse_run, **kwargs) + end + + def == + super && self.name == other.name && self.kwargs == other.kwargs + end + + def inspect + "#{@name} #{kwargs.inspect[1..-2]}" + end +end diff --git a/lib/interscript/node/rule/run.rb b/lib/interscript/node/rule/run.rb new file mode 100644 index 00000000..78644c07 --- /dev/null +++ b/lib/interscript/node/rule/run.rb @@ -0,0 +1,28 @@ +class Interscript::Node::Rule::Run < Interscript::Node::Rule + attr_accessor :stage, :reverse_run + def initialize stage, reverse_run: nil + @stage = stage + @reverse_run = reverse_run + end + + def to_hash + { :class => self.class.to_s, + :stage => self.stage.to_hash } + end + + def reverse + Interscript::Node::Rule::Run.new(stage, + reverse_run: reverse_run.nil? ? nil : !reverse_run + ) + end + + def ==(other) + super && self.stage == other.stage + end + + def inspect + out = "run #{@stage.inspect}" + out += ", reverse_run: #{@reverse_run.inspect}" unless reverse_run.nil? + out + end +end diff --git a/lib/interscript/node/rule/sub.rb b/lib/interscript/node/rule/sub.rb new file mode 100644 index 00000000..7086e456 --- /dev/null +++ b/lib/interscript/node/rule/sub.rb @@ -0,0 +1,229 @@ +class Interscript::Node::Rule::Sub < Interscript::Node::Rule + attr_accessor :from, :to + attr_accessor :before, :not_before, :after, :not_after + attr_accessor :reverse_before, :reverse_not_before, :reverse_after, :reverse_not_after + attr_accessor :reverse_run + attr_accessor :priority + + def initialize (from, to, + before: nil, not_before: nil, + after: nil, not_after: nil, + priority: nil, reverse_run: nil) + self.from = Interscript::Node::Item.try_convert from + if to == :upcase + self.to = :upcase + elsif to == :downcase + self.to = :downcase + else + self.to = Interscript::Node::Item.try_convert to + end + + self.priority = priority + + #raise TypeError, "Can't supply both before and not_before" if before && not_before + #raise TypeError, "Can't supply both after and not_after" if after && not_after + + self.reverse_run = reverse_run + + self.before = Interscript::Node::Item.try_convert(before) if before + self.after = Interscript::Node::Item.try_convert(after) if after + self.not_before = Interscript::Node::Item.try_convert(not_before) if not_before + self.not_after = Interscript::Node::Item.try_convert(not_after) if not_after + end + + def max_length + len = self.from.max_length + len += self.before.max_length if self.before + len += self.after.max_length if self.after + len += self.not_before.max_length if self.not_before + len += self.not_after.max_length if self.not_after + len += self.priority if self.priority + len + end + + def to_hash + puts self.from.inspect if $DEBUG + puts params.inspect if $DEBUG + hash = { :class => self.class.to_s, + :from => self.from.to_hash, + :to => Symbol === self.to ? self.to : self.to.to_hash, + :reverse_run => self.reverse_run, + :before => self.before&.to_hash, + :not_before => self.not_before&.to_hash, + :after => self.after&.to_hash, + :not_after => self.not_after&.to_hash, + :priority => self.priority + } + + hash[:before] = self.before&.to_hash if self.before + hash[:not_before] = self.not_before&.to_hash if self.not_before + hash[:after] = self.after&.to_hash if self.after + hash[:not_after] = self.not_after&.to_hash if self.not_after + hash[:priority] = self.priority if self.priority + + hash + end + + def reverse + if to == :upcase + xfrom = from.downcase + xto = :downcase + elsif to == :downcase + xfrom = from.upcase + xto = :upcase + else + xto, xfrom = reverse_transfer(from, to) + end + + # A special case: sub "a", "" shouldn't be present in a reverse map + rrun = self.reverse_run.nil? ? nil : !self.reverse_run + if rrun.nil? && !has_assertions? && + (xfrom == "" || + (Interscript::Node::Item::String === xfrom && xfrom.data == '') || + (Interscript::Node::Item::Alias === xfrom && xfrom.name == :none) + ) + + rrun = true + end + + Interscript::Node::Rule::Sub.new(xfrom, xto, + before: before, after: after, + not_before: not_before, not_after: not_after, + + reverse_run: rrun, + + priority: priority ? -priority : nil + ) + end + + def has_assertions? + !!(before || not_before || not_after || after) + end + + # Attempt to transfer some references to boundary/line_begin around. + # Those in general should go into before/after clauses, but for now + # let's try to get the best compatibility possible. Also, CaptureGroup, + # CaptureRef need to be shifted around + def reverse_transfer from, to + # This part is about moving initial and final boundary like aliases + case from + when Interscript::Node::Item::Group + first = from.children.first + last = from.children.last + + if Interscript::Node::Item::Alias === first && first.boundary_like? + out = Interscript::Node::Item::Group.new + first + to + to = out.compact + + from = from.dup.tap do |i| + i.children = i.children[1..-1] + end.compact + end + + if Interscript::Node::Item::Alias === last && last.boundary_like? + out = Interscript::Node::Item::Group.new + to + last + to = out.compact + + from = from.dup.tap do |i| + i.children = i.children[0..-2] + end.compact + end + when Interscript::Node::Item::Alias + if from.boundary_like? + to = if from.name.to_s.end_with? "_end" + Interscript::Node::Item::Group.new + to + from + else + Interscript::Node::Item::Group.new + from + to + end + from = Interscript::Node::Item::Alias.new(:none) + end + end + + # This part is about moving backreferences + state = {left:[], right:[]} + + from = reverse_transfer_visit(from, :from, state) + to = reverse_transfer_visit(to, :to, state) + + [from, to] + end + + private def reverse_transfer_visit(node, type, state) + node = Interscript::Node::Item.try_convert(node) + + case node + when Interscript::Node::Item::Alias + if node.name == :kor_maybedash + state[:left] << node + Interscript::Node::Item::CaptureRef.new(state[:left].length) + else + node + end + when Interscript::Node::Item::String + node + when Interscript::Node::Item::Any + if Array === node.value + node.dup.tap do |i| + i.value = i.value.map { |c| reverse_transfer_visit(c, type, state) } + end + else + node + end + when Interscript::Node::Item::Group + node.dup.tap do |i| + i.children = i.children.map { |c| reverse_transfer_visit(c, type, state) } + end + when Interscript::Node::Item::Repeat + node.dup.tap do |i| + i.data = reverse_transfer_visit(i.data, type, state) + end + when Interscript::Node::Item::CaptureRef + if type == :from + node + elsif state[:right][node.id] + node + else + state[:right][node.id] = true + state[:left][node.id - 1] or raise Interscript::MapLogicError, "Capture count doesn't match" + end + when Interscript::Node::Item::CaptureGroup + state[:left] << node + out = Interscript::Node::Item::CaptureRef.new(state[:left].length) + reverse_transfer_visit(node.data, type, state) # Visit but don't care + out + else + raise "Type #{node.class} unhandled!" + end + end + + def ==(other) + super && + self.from == other.from && + self.to == other.to && + self.before == other.before && + self.after == other.after && + self.not_before == other.not_before && + self.not_after == other.not_after && + self.priority == other.priority + end + + def inspect + out = "sub " + params = [] + params << @from.inspect + if Symbol === @to + params << @to.to_s + else + params << @to.inspect + end + params << "reverse_run: #{@reverse_run.inspect}" unless @reverse_run.nil? + + params << "before: #{@before.inspect}" if @before + params << "after: #{@after.inspect}" if @after + params << "not_before: #{@not_before.inspect}" if @not_before + params << "not_after: #{@not_after.inspect}" if @not_after + + params << "priority: #{@priority.inspect}" if @priority + out << params.join(", ") + end +end diff --git a/lib/interscript/node/stage.rb b/lib/interscript/node/stage.rb new file mode 100644 index 00000000..a61ae865 --- /dev/null +++ b/lib/interscript/node/stage.rb @@ -0,0 +1,45 @@ +class Interscript::Node::Stage < Interscript::Node::Group::Sequential + attr_accessor :name, :doc_name, :dont_reverse + + def initialize(name = :main, reverse_run: nil, doc_name: nil, dont_reverse: false) + @name = name + @doc_name = doc_name + @dont_reverse = dont_reverse + super(reverse_run: reverse_run) + end + + def to_hash + { :class => self.class.to_s, + :name => name, + :children => @children.map{|x| x.to_hash} } + end + + def reverse + return self if dont_reverse + + @reverse ||= begin + self.class.new(name, + doc_name: Interscript::Node::Document.reverse_name(doc_name), + reverse_run: reverse_run.nil? ? nil : !reverse_run + ).tap do |r| + r.children = self.children.reverse.map(&:reverse) + end + end + end + + def ==(other) + super && + self.name == other.name && + self.reverse_run == other.reverse_run && + self.dont_reverse == other.dont_reverse + end + + def inspect + args = [] + args << "#{@name}" if @name != :main + args << "dont_reverse: true" if dont_reverse + name = "" + name = "(#{args.join(", ")})" unless args.empty? + "stage#{name} {\n#{super}\n}" + end +end diff --git a/ruby/lib/interscript/node/tests.rb b/lib/interscript/node/tests.rb similarity index 54% rename from ruby/lib/interscript/node/tests.rb rename to lib/interscript/node/tests.rb index ab08ad87..f440cd98 100644 --- a/ruby/lib/interscript/node/tests.rb +++ b/lib/interscript/node/tests.rb @@ -8,6 +8,16 @@ def <<(pair) @data << pair end + def reverse + self.class.new(data.map do |from,to,reverse_run| + [to, from, reverse_run == nil ? nil : !reverse_run] + end) + end + + def ==(other) + super && self.data == other.data + end + def to_hash { :class => self.class.to_s, :data => @data } diff --git a/ruby/lib/interscript/stdlib.rb b/lib/interscript/stdlib.rb similarity index 69% rename from ruby/lib/interscript/stdlib.rb rename to lib/interscript/stdlib.rb index 2acbe8af..2f9100d7 100644 --- a/ruby/lib/interscript/stdlib.rb +++ b/lib/interscript/stdlib.rb @@ -22,6 +22,10 @@ def self.re_only_alias?(a) ! %i[none space].include?(a) end + def self.boundary_like_alias?(a) + %i[line_start line_end string_start string_end boundary non_word_boundary].include?(a) + end + @treecache = {} def self.parallel_regexp_compile(subs_hash) @@ -167,7 +171,23 @@ def self.deterministic_sort_by_max_length(ary) end def self.available_functions - %i[title_case downcase compose decompose separate secryst] + %i[title_case downcase compose decompose separate unseparate secryst rababa rababa_reverse] + end + + def self.reverse_function + { + title_case: :downcase, # Those two are best-effort, + downcase: :title_case, # but probably wrong. + + compose: :decompose, + decompose: :compose, + + separate: :unseparate, + unseparate: :separate, + + rababa: :rababa_reverse, + rababa_reverse: :rababa, + } end module Functions @@ -177,8 +197,13 @@ def self.title_case(output, word_separator: " ") output end - def self.downcase(output, _:nil) - output.downcase + def self.downcase(output, word_separator: nil) + if word_separator + output = output.gsub(/^(.)/, &:downcase) + output = output.gsub(/#{word_separator}(.)/, &:downcase) unless word_separator == '' + else + output.downcase + end end def self.compose(output, _:nil) @@ -193,11 +218,15 @@ def self.separate(output, separator: " ") output.split("").join(separator) end + def self.unseparate(output, separator: " ") + output.split(separator).join("") + end + @secryst_models = {} def self.secryst(output, model:) require "secryst" rescue nil # Try to load secryst, but don't fail hard if not possible. unless defined? Secryst - raise StandardError, "Secryst is not loaded. Please read docs/Usage_with_Secryst.adoc" + raise Interscript::ExternalUtilError, "Secryst is not loaded. Please read docs/Usage_with_Secryst.adoc" end Interscript.secryst_index_locations.each do |remote| Secryst::Provisioning.add_remote(remote) @@ -207,5 +236,34 @@ def self.secryst(output, model:) @secryst_models[model].translate(i) end.join("\n") end + + def self.rababa(output, config:) + require "rababa" rescue nil # Try to load rababa, but don't fail hard if not possible. + unless defined? Rababa + raise Interscript::ExternalUtilError, "Rababa is not loaded. Please read docs/Usage_with_Rababa.adoc" + end + + config_value = Interscript.rababa_configs[config] + model_uri = config_value['model'] + rababa_config = config_value['config'] + model_path = Interscript.rababa_provision(config, model_uri) + + @rababa_diacritizer ||= Rababa::Diacritizer.new(model_path, rababa_config) + + @rababa_diacritizer.diacritize_text(output) + end + + def self.rababa_reverse(output, config:) + # require "rababa" rescue nil # Try to load rababa, but don't fail hard if not possible. + # unless defined? Rababa + # raise StandardError, "Rababa is not loaded. Please read docs/Usage_with_Rababa.adoc" + # end + + # A call to allocate allows us to remove diacritics without initializing the model + # Rababa::Diacritizer.allocate.remove_diacritics(output) + + # Unfortunately, this is broken as of now. + output.gsub(/[\u064e\u064b\u064f\u064c\u0650\u064d\u0652\u0651]/, '') + end end end diff --git a/lib/interscript/utils/helpers.rb b/lib/interscript/utils/helpers.rb new file mode 100644 index 00000000..71999d6c --- /dev/null +++ b/lib/interscript/utils/helpers.rb @@ -0,0 +1,39 @@ +module Interscript::Utils + module Helpers + def document name=nil, &block + $example_id ||= 0 + $example_id += 1 + name ||= "example-#{$example_id}" + + Interscript::DSL::Document.new(name, &block).node.tap do |i| + $documents ||= {} + $documents[name] = i + end + end + + def stage &block + document { + stage(&block) + } + end + end +end + +class Interscript::Node::Document + def call(str, stage=:main, compiler=$compiler || Interscript::Interpreter, **kwargs) + compiler.(self).(str, stage, **kwargs) + end +end + +module Interscript::DSL + class << self + alias original_parse parse + def parse(map_name, **kwargs) + if $documents && $documents[map_name] + $documents[map_name] + else + original_parse(map_name, **kwargs) + end + end + end +end \ No newline at end of file diff --git a/ruby/lib/interscript/utils/regexp_converter.rb b/lib/interscript/utils/regexp_converter.rb similarity index 100% rename from ruby/lib/interscript/utils/regexp_converter.rb rename to lib/interscript/utils/regexp_converter.rb diff --git a/ruby/lib/interscript/version.rb b/lib/interscript/version.rb similarity index 53% rename from ruby/lib/interscript/version.rb rename to lib/interscript/version.rb index 1889ce23..72eeefb3 100644 --- a/ruby/lib/interscript/version.rb +++ b/lib/interscript/version.rb @@ -1,3 +1,3 @@ module Interscript - VERSION = "2.1.0" + VERSION = "2.4.5" end diff --git a/ruby/lib/interscript/visualize.rb b/lib/interscript/visualize.rb similarity index 100% rename from ruby/lib/interscript/visualize.rb rename to lib/interscript/visualize.rb diff --git a/ruby/lib/interscript/visualize/group.html.erb b/lib/interscript/visualize/group.html.erb similarity index 100% rename from ruby/lib/interscript/visualize/group.html.erb rename to lib/interscript/visualize/group.html.erb diff --git a/ruby/lib/interscript/visualize/json.rb b/lib/interscript/visualize/json.rb similarity index 75% rename from ruby/lib/interscript/visualize/json.rb rename to lib/interscript/visualize/json.rb index 93006411..e995b1cd 100644 --- a/ruby/lib/interscript/visualize/json.rb +++ b/lib/interscript/visualize/json.rb @@ -10,6 +10,7 @@ def to_visualization_array(map=self) more << "after: #{rule.after.to_html(map)}" if rule.after more << "not before: #{rule.not_before.to_html(map)}" if rule.not_before more << "not after: #{rule.not_after.to_html(map)}" if rule.not_after + more << "reverse run: #{rule.reverse_run}" unless rule.reverse_run.nil? more = more.join(", ") out << { @@ -24,11 +25,14 @@ def to_visualization_array(map=self) children: rule.to_visualization_array(map) } when Interscript::Node::Rule::Funcall + more = rule.kwargs.map do |k,v| + "#{k.to_s.gsub("_", " ")}: #{v}" + end + more << "reverse run: #{rule.reverse_run}" unless rule.reverse_run.nil? + out << { type: rule.name.to_s.gsub("_", " ").gsub(/^(.)/, &:upcase), - more: rule.kwargs.map do |k,v| - "#{k.to_s.gsub("_", " ")}: #{v}" - end.join(", ") + more: more.join(", ") } when Interscript::Node::Rule::Run if rule.stage.map @@ -39,10 +43,14 @@ def to_visualization_array(map=self) stage = rule.stage.name end + more = [] + more << "reverse run: #{rule.reverse_run}" unless rule.reverse_run.nil? + out << { type: "Run", doc: doc.name, - stage: stage + stage: stage, + more: more.join(", "), } else out << { diff --git a/ruby/lib/interscript/visualize/map.html.erb b/lib/interscript/visualize/map.html.erb similarity index 87% rename from ruby/lib/interscript/visualize/map.html.erb rename to lib/interscript/visualize/map.html.erb index fcb8ac29..222ee8ad 100644 --- a/ruby/lib/interscript/visualize/map.html.erb +++ b/lib/interscript/visualize/map.html.erb @@ -35,7 +35,7 @@ <% case k when :url %> -
<%= h v %> +
<% v.each do |i| %><%= h i %><% if i != v.last %>; <% end %><% end %> <% when :notes, :implementation_notes, :special_rules, :original_notes, :original_description # We ignore notes for now %> <% else %>
<%= h v %> @@ -43,4 +43,4 @@ <% end %> -<%= render_stage(self.map.name, :main) %> \ No newline at end of file +<%= render_stage(self.map.name, :main) %> diff --git a/ruby/lib/interscript/visualize/nodes.rb b/lib/interscript/visualize/nodes.rb similarity index 100% rename from ruby/lib/interscript/visualize/nodes.rb rename to lib/interscript/visualize/nodes.rb diff --git a/maps b/maps deleted file mode 160000 index bdd7d700..00000000 --- a/maps +++ /dev/null @@ -1 +0,0 @@ -Subproject commit bdd7d7002ecb246fcfff3bf8514187dde170341c diff --git a/ruby/requirements.txt b/requirements.txt similarity index 100% rename from ruby/requirements.txt rename to requirements.txt diff --git a/ruby/.gitignore b/ruby/.gitignore deleted file mode 100644 index b04a8c84..00000000 --- a/ruby/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -/.bundle/ -/.yardoc -/_yardoc/ -/coverage/ -/doc/ -/pkg/ -/spec/reports/ -/tmp/ - -# rspec failure tracking -.rspec_status diff --git a/ruby/LICENSE.adoc b/ruby/LICENSE.adoc deleted file mode 100644 index 86b8593b..00000000 --- a/ruby/LICENSE.adoc +++ /dev/null @@ -1,31 +0,0 @@ -= Licenses & Copyright - -This license file adheres to the formatting guidelines of -https://github.com/nevir/readable-licenses[readable-licenses]. - - -== Ribose BSD 2-Clause License - -Copyright (c) 2019-, https://www.ribose.com[Ribose Inc]. -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/ruby/bin/console b/ruby/bin/console deleted file mode 100755 index e9f20ea8..00000000 --- a/ruby/bin/console +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env ruby - -require "bundler/setup" -require "interscript" - -# You can add fixtures and/or initialization code here to make experimenting -# with your gem easier. You can also use a different console, if you like. - -# (If you use this, don't forget to add pry to your Gemfile!) -# require "pry" -# Pry.start - -require "irb" -IRB.start(__FILE__) diff --git a/ruby/lib/interscript/command.rb b/ruby/lib/interscript/command.rb deleted file mode 100644 index 6457e5de..00000000 --- a/ruby/lib/interscript/command.rb +++ /dev/null @@ -1,28 +0,0 @@ -require 'thor' -require 'interscript' -require 'json' -module Interscript - # Command line interface - class Command < Thor - desc '', 'Transliterate text' - option :system, aliases: '-s', required: true, desc: 'Transliteration system' - option :output, aliases: '-o', required: false, desc: 'Output file' - # Was this option really well thought out? The last parameter is a cache, isn't it? - #option :map, aliases: '-m', required: false, default: "{}", desc: 'Transliteration mapping json' - - def translit(input) - if options[:output] - Interscript.transliterate_file(options[:system], input, options[:output]) #, JSON.parse(options[:map])) - else - puts Interscript.transliterate(options[:system], IO.read(input)) - end - end - - desc 'list', 'Prints allowed transliteration systems' - def list - Interscript.maps(load_path: true).each do |path| - puts path - end - end - end -end diff --git a/ruby/lib/interscript/dsl/group/parallel.rb b/ruby/lib/interscript/dsl/group/parallel.rb deleted file mode 100644 index f7df1692..00000000 --- a/ruby/lib/interscript/dsl/group/parallel.rb +++ /dev/null @@ -1,6 +0,0 @@ -class Interscript::DSL::Group::Parallel < Interscript::DSL::Group - def initialize(&block) - @node = Interscript::Node::Group::Parallel.new - self.instance_exec(&block) - end -end diff --git a/ruby/lib/interscript/node/dependency.rb b/ruby/lib/interscript/node/dependency.rb deleted file mode 100644 index c5a22918..00000000 --- a/ruby/lib/interscript/node/dependency.rb +++ /dev/null @@ -1,13 +0,0 @@ -class Interscript::Node::Dependency < Interscript::Node - attr_accessor :name, :full_name, :import, :document - - def initialize - end - - def to_hash - { :class => self.class.to_s, - :name => @name, - :full_name => @full_name, - :import => @import } - end -end diff --git a/ruby/lib/interscript/node/rule/funcall.rb b/ruby/lib/interscript/node/rule/funcall.rb deleted file mode 100644 index 287c7481..00000000 --- a/ruby/lib/interscript/node/rule/funcall.rb +++ /dev/null @@ -1,18 +0,0 @@ -class Interscript::Node::Rule::Funcall < Interscript::Node::Rule - attr_accessor :name, :kwargs - def initialize name, **kwargs - @name = name - @kwargs = kwargs - end - - def to_hash - { :class => self.class.to_s, - :name => self.name, - :kwargs => self.kwargs - } - end - - def inspect - "#{@name} #{kwargs.inspect[1..-2]}" - end -end diff --git a/ruby/lib/interscript/node/rule/run.rb b/ruby/lib/interscript/node/rule/run.rb deleted file mode 100644 index 774ccce9..00000000 --- a/ruby/lib/interscript/node/rule/run.rb +++ /dev/null @@ -1,15 +0,0 @@ -class Interscript::Node::Rule::Run < Interscript::Node::Rule - attr_accessor :stage - def initialize stage - @stage = stage - end - - def to_hash - { :class => self.class.to_s, - :stage => self.stage.to_hash } - end - - def inspect - "run #{@stage.inspect}" - end -end diff --git a/ruby/lib/interscript/node/rule/sub.rb b/ruby/lib/interscript/node/rule/sub.rb deleted file mode 100644 index fdf5aa3e..00000000 --- a/ruby/lib/interscript/node/rule/sub.rb +++ /dev/null @@ -1,68 +0,0 @@ -class Interscript::Node::Rule::Sub < Interscript::Node::Rule - attr_accessor :from, :to - attr_accessor :before, :not_before, :after, :not_after - attr_accessor :priority - - def initialize from, to, before: nil, not_before: nil, after: nil, not_after: nil, priority: nil - self.from = Interscript::Node::Item.try_convert from - if to == :upcase - self.to = :upcase - else - self.to = Interscript::Node::Item.try_convert to - end - - self.priority = priority - - #raise TypeError, "Can't supply both before and not_before" if before && not_before - #raise TypeError, "Can't supply both after and not_after" if after && not_after - - self.before = Interscript::Node::Item.try_convert(before) if before - self.after = Interscript::Node::Item.try_convert(after) if after - self.not_before = Interscript::Node::Item.try_convert(not_before) if not_before - self.not_after = Interscript::Node::Item.try_convert(not_after) if not_after - end - - def max_length - len = self.from.max_length - len += self.before.max_length if self.before - len += self.after.max_length if self.after - len += self.not_before.max_length if self.not_before - len += self.not_after.max_length if self.not_after - len += self.priority if self.priority - len - end - - def to_hash - puts self.from.inspect if $DEBUG - puts params.inspect if $DEBUG - hash = { :class => self.class.to_s, - :from => self.from.to_hash, - :to => Symbol === self.to ? self.to : self.to.to_hash - } - - hash[:before] = self.before&.to_hash if self.before - hash[:not_before] = self.not_before&.to_hash if self.not_before - hash[:after] = self.after&.to_hash if self.after - hash[:not_after] = self.not_after&.to_hash if self.not_after - hash[:priority] = self.priority if self.priority - - hash - end - - def inspect - out = "sub " - params = [] - params << @from.inspect - if @to == :upcase - params << "upcase" - else - params << @to.inspect - end - params << "before: #{@before.inspect}" if @before - params << "after: #{@after.inspect}" if @after - params << "not_before: #{@not_before.inspect}" if @not_before - params << "not_after: #{@not_after.inspect}" if @not_after - params << "priority: #{@priority.inspect}" if @priority - out << params.join(", ") - end -end diff --git a/ruby/lib/interscript/node/stage.rb b/ruby/lib/interscript/node/stage.rb deleted file mode 100644 index a08a9c02..00000000 --- a/ruby/lib/interscript/node/stage.rb +++ /dev/null @@ -1,19 +0,0 @@ -class Interscript::Node::Stage < Interscript::Node::Group::Sequential - attr_accessor :name, :doc_name - - def initialize name = :main - @name = name - super() - end - - def to_hash - { :class => self.class.to_s, - :name => name, - :children => @children.map{|x| x.to_hash} } - end - - def inspect - name = "(#{@name})" if @name != :main - "stage#{name} {\n#{super}\n}" - end -end diff --git a/spec/authority_codes.yaml b/spec/authority_codes.yaml new file mode 100644 index 00000000..d762bf73 --- /dev/null +++ b/spec/authority_codes.yaml @@ -0,0 +1,309 @@ +acadsin: + code: acadsin + name: + en: Academia Sinica +ahl: + code: ahl + name: + en: The Academy of the Hebrew Language +alalc: + code: alalc + name: + en: American Library Association -- Library of Congress +ammi: + code: ammi + name: + en: Afghanistan Ministry of Mines and Industries +ansi: + code: ansi + name: + en: American National Standards Institute +apcbg: + code: apcbg + name: + en: Antarctic Place-names Commission of Bulgaria +asm: + code: asm + name: + en: Academy of Sciences of Moldova +az: + code: az + name: + en: Azerbijian Government +bas: + code: bas + name: + en: Bulgarian Academy of Sciences +bds: + code: bds + name: + en: Bulgarian Institute for Standardization +bgn: + code: bgn + name: + en: United States Board on Geographic Names +bgna: + code: bgna + name: + en: National Assembly of the Republic of Bulgaria +bgnpcgn: + code: bgnpcgn + name: + en: United States Board on Geographic Names -- Permanent Committee on Geographical + Names for British Official Use +bis: + code: bis + name: + en: Bureau of Indian Standards +biulo: + code: biulo + name: + en: Bibliothèque interuniversitaire des langues orientales +bsi: + code: bsi + name: + en: British Standards Institution +bt: + code: bt + name: + en: Royal Government of Bhutan +bulac: + code: bulac + name: + en: Bibliothèque universitaire des langues et civilisations +by: + code: by + name: + en: Government of Belarus +cn: + code: cn + name: + en: Government of China +cnt: + code: cnt + name: + en: Lao Commission Nationale de Toponymie +din: + code: din + name: + en: German Institute for Standardization +dmg: + code: dmg + name: + en: Deutsche Morgenländische Gesellschaft +dos: + code: dos + name: + en: Survey Department, Ministry of Land Management, Cooperatives and Poverty + Alleviation, Government of Nepal +easc: + code: easc + name: + en: Euro-Asian Council for Standardization, Metrology and Certification +efeo: + code: efeo + name: + en: École française d'Extrême-Orient +elot: + code: elot + name: + en: Hellenic Organization for Standardization +gaz: + code: gaz + name: + en: Azeri Government +ggg: + code: ggg + name: + en: Georgian State Department of Geodesy and Cartography +gki: + code: gki + name: + en: State Committee on Property of the Republic of Belarus +gost: + code: gost + name: + en: Rosstandart +gsi: + code: gsi + name: + en: Geospatial Information Authority of Japan +hk: + code: hk + name: + en: Hong Kong Government +icao: + code: icao + name: + en: International Civil Aviation Organization +ign: + code: ign + name: + en: Institut Geographique Nationale +iso: + code: iso + name: + en: International Organization for Standardization +itk: + code: itk + name: + en: Inuit Tapiriit Kanatami +jp: + code: jp + name: + en: Government of Japan +jra: + code: jra + name: + en: Japan Road Association +kp: + code: kp + name: + en: Democratic People's Republic of Korea +lbmod: + code: lbmod + name: + en: Lebanese Republic Ministry of National Defense +lshk: + code: lshk + name: + en: Linguistic Society of Hong Kong +ma: + code: ma + name: + en: Kingdom of Morocco +md: + code: md + name: + en: Republic of Moldova +mext: + code: mext + name: + en: Ministry of Education, Culture, Sports, Science and Technology -- Japan +mk: + code: mk + name: + en: Republic of North Macedonia +mlc: + code: mlc + name: + en: Myanmar Language Commission +mlit: + code: mlit + name: + en: Ministry of Land, Infrastructure, Transport and Tourism of Japan +mlmupc: + code: mlmupc + name: + en: The Ministry of Land Management, Urban Planning and Construction of Cambodia +masm: + code: masm + name: + en: Mongolian Agency for Standardization and Metrology +moct: + code: moct + name: + en: Korean Ministry of Culture and Tourism +mofa: + code: mofa + name: + en: Ministry of Foreign Affairs of Japan +msst: + code: msst + name: + en: The Major State Service "Turkmenstandartlary" +mv: + code: mv + name: + en: Republic of Maldives +nco: + code: nco + name: + en: National Cartographic Center of Iran +nikl: + code: nikl + name: + en: National Institute of Korean Language +nrs: + code: nrs + name: + en: Nippon-no-Rômazi-Sya + notes: Also known as the Japan Romanization Society. +odni: + code: odni + name: + en: Office of the Director Of National Intelligence +rjgc: + code: rjgc + name: + en: Royal Jordanian Geographic Center +royin: + code: royin + name: + en: The Royal Society of Thailand + notes: Formerly named The Royal Institute of Thailand (royin) +rs: + code: rs + name: + en: Republic of Serbia +sac: + code: sac + name: + en: Standardization Administration of China +sasm: + code: sasm + name: + en: The former State Administration of Surveying and Mapping of the People's Republic of China +ses: + code: ses + name: + en: Survey of Egypt +sfs: + code: sfs + name: + en: Finnish Standards Association +sgk: + code: sgk + name: + en: Khmere Service Geographique +tm: + code: tm + name: + en: Republic of Turkmenistan +ua: + code: ua + name: + en: Government of Ukraine +ucis: + code: ucis + name: + en: Uyghur Computer Information Society +un: + code: un + name: + en: United Nations +uz: + code: uz + name: + en: Government of Uzbekistan +var: + code: var + name: + en: Various systems managed by ISO {docnumber}/AG +xlsc: + code: xlsc + name: + en: XUAR Language and Script Committee +yivo: + code: yivo + name: + en: YIVO Institute for Jewish Research +mvd: + code: mvd + name: + en: The Ministry of Internal Affairs of the Republic of Belarus + +stategeocadastre: + code: stategeocadastre + name: + en: State Service of Ukraine for Geodesy, Cartography and Cadastre (StateGeoCadastre) diff --git a/spec/composability_spec.rb b/spec/composability_spec.rb new file mode 100644 index 00000000..9e2d998b --- /dev/null +++ b/spec/composability_spec.rb @@ -0,0 +1,50 @@ +RSpec.describe "composability" do + it "can depend on reversed maps" do + a = document("part-1-One-Two") { + stage { + sub "a", "b" + } + } + + b = document("part-2-One-Two") { + stage { + sub "c", "d" + } + } + + c = document("composed") { + dependency "part-1-Two-One", as: twoone + dependency "part-2-One-Two", as: onetwo + + stage { + run map.twoone.stage.main + run map.onetwo.stage.main + } + } + + expect(c.("abcd")).to eq("aadd") + end + + it "can seamlessly compose two maps" do + a = document("part1") { + stage { + sub "a", "b" + } + } + b = document("part2") { + stage { + sub "c", "d" + } + } + + c = document("composed2") { + dependency "part1|part2", as: composed + + stage { + run map.composed.stage.main + } + } + + expect(c.("abcd")).to eq("bbdd") + end +end \ No newline at end of file diff --git a/spec/detector_spec.rb b/spec/detector_spec.rb new file mode 100644 index 00000000..648635ab --- /dev/null +++ b/spec/detector_spec.rb @@ -0,0 +1,28 @@ +RSpec.describe Interscript::Detector do + it "should return valid data when map_pattern is selected and multiple is true" do + out = Interscript.detect( + "привет", "privet", + map_pattern: "icao-ukr-*", + multiple: true, + compiler: Interscript::Compiler::Ruby + ) + expected = {"icao-ukr-Cyrl-Latn-9303" => 1.0} + expect(out).to eq(expected) + end + + it "should return valid data when map_pattern isn't selected and multiple is false" do + out = Interscript.detect("привет", "privet", compiler: Interscript::Compiler::Ruby) + expect(out).to be_a(String) + end + + it "should return valid data when map_pattern isn't selected and multiple is true" do + out = Interscript.detect( + "привет", "privet", + multiple: true, + compiler: Interscript::Compiler::Ruby, + ) + expect(out).to be_a(Hash) + expect(out.keys.all? { |i| i.class == String }).to be true + expect(out.values.all? { |i| Numeric === i }).to be true + end +end \ No newline at end of file diff --git a/ruby/spec/dsl_stage_spec.rb b/spec/dsl_stage_spec.rb similarity index 100% rename from ruby/spec/dsl_stage_spec.rb rename to spec/dsl_stage_spec.rb diff --git a/ruby/spec/interscript_spec.rb b/spec/interscript_spec.rb similarity index 56% rename from ruby/spec/interscript_spec.rb rename to spec/interscript_spec.rb index 5810be01..21e77afa 100644 --- a/ruby/spec/interscript_spec.rb +++ b/spec/interscript_spec.rb @@ -16,26 +16,43 @@ RSpec.describe Interscript do each_compiler do |compiler| + next if ENV["ONLY_COMPILER"] && compiler.name != ENV["ONLY_COMPILER"] + describe compiler do - maps.each do |system_file| + compiler_maps = Interscript.exclude_maps(maps, compiler: compiler) + + compiler_maps.each do |system_file| system_name = File.basename(system_file, ".imp") - context "#{system_name} system" do + if ENV["REVERSE"] + my_system_name = Interscript::Node::Document.reverse_name(system_name) + else + my_system_name = system_name + end + + context "#{my_system_name} system" do begin system = Interscript.parse(system_name) + system = system.reverse if ENV["REVERSE"] if system.tests && system.tests.data && system.tests.data.length > 0 - system.tests.data.each do |from,expected| + system.tests.data.each do |from,expected,reverse_run| + next if reverse_run == true + testname = from[0...300].gsub("\n", " / ") it "test for #{testname}" do - Timeout::timeout(5) do - result = Interscript.transliterate(system_name, from, cache, compiler: compiler) + # Allow a bigger timeout for Rababa so that model files + # can be provisioned. This is temporary until we find a + # better location for this code. + timeout = my_system_name =~ /rababa/ ? 100 : 5 + Timeout::timeout(timeout) do + result = Interscript.transliterate(my_system_name, from, cache, compiler: compiler) expect(result).to eq(expected) end end end else it "can successfully run a dummy test" do - result = Interscript.transliterate(system_name, "", cache, compiler: compiler) + result = Interscript.transliterate(my_system_name, "", cache, compiler: compiler) expect(result).to eq("") end if ENV["REQUIRE_TESTS"] diff --git a/spec/map_name_and_metadata_spec.rb b/spec/map_name_and_metadata_spec.rb new file mode 100644 index 00000000..5d63ee03 --- /dev/null +++ b/spec/map_name_and_metadata_spec.rb @@ -0,0 +1,45 @@ +require "spec_helper" +require "iso-639-data" +require "iso-15924" + +RSpec.describe "map names and metadata" do + valid_authcodes = YAML.load(File.read(__dir__+"/authority_codes.yaml")).keys + + Interscript.maps.each do |n| + context n do + parts = n.split('-', 5) + authcode, lang, source_script, target_script, id = parts + map = Interscript.parse(n) + + it "has a valid name" do + expect(parts.count).to be 5 + expect(valid_authcodes).to include authcode + expect(Iso639Data.valid?(lang)).to be true + expect(Iso15924.valid?(source_script)).to be true + expect(Iso15924.valid?(target_script)).to be true + end + + it "has matching metadata" do + expect(map.metadata[:authority_id]).to eq authcode + expect(map.metadata[:source_script]).to eq source_script + expect(map.metadata[:destination_script]).to eq target_script + expect(map.metadata[:id]).to eq id + end + + it "has a correct language in the metadata" do + m_auth, m_lang = map.metadata[:language].split(':', 2) + + expect(lang).to eq m_lang + + case m_auth + when 'iso-639-2' + expect(Iso639Data.iso_639_2.key? m_lang).to be true + when 'iso-639-3' + expect(Iso639Data.iso_639_3.key? m_lang).to be true + else + raise "#{m_auth} is an invalid authority for #{lang} - iso-639-2 or 3 expected" + end + end + end + end +end diff --git a/spec/reversibility_spec.rb b/spec/reversibility_spec.rb new file mode 100644 index 00000000..06f47fce --- /dev/null +++ b/spec/reversibility_spec.rb @@ -0,0 +1,285 @@ +RSpec.describe "Reversibility" do + describe "stage tests" do + it "reverses a basic stage" do + a = stage { + sub "a", "b" + } + + b = stage { + sub "b", "a" + } + + expect(a.reverse).to eq(b) + end + + it "reverses a multirule stage" do + a = stage { + sub "a", "b" + sub "c", "d" + } + + b = stage { + sub "d", "c" + sub "b", "a" + } + + expect(a.reverse).to eq(b) + end + + it "reverses a multirule stage and preserves before/after" do + a = stage { + sub "a", "b", before: "c" + sub "c", "d", after: "d" + } + + b = stage { + sub "d", "c", after: "d" + sub "b", "a", before: "c" + } + + expect(a.reverse).to eq(b) + end + + it "reverses a multirule stage and preserves not before/not after" do + a = stage { + sub "a", "b", not_before: "c" + sub "c", "d", not_after: "d" + } + + b = stage { + sub "d", "c", not_after: "d" + sub "b", "a", not_before: "c" + } + + expect(a.reverse).to eq(b) + end + + it "reverses a parallel stage" do + a = stage { + parallel { + sub "a", "b" + sub "c", "d" + sub "e", "f" + } + } + + b = stage { + parallel { + sub "f", "e" + sub "d", "c" + sub "b", "a" + } + } + + expect(a.reverse).to eq(b) + end + + it "reverses a parallel stage and other rules if present" do + a = stage { + sub "X", "Y" + parallel { + sub "a", "b" + sub "c", "d" + sub "e", "f" + } + } + + b = stage { + parallel { + sub "f", "e" + sub "d", "c" + sub "b", "a" + } + sub "Y", "X" + } + + expect(a.reverse).to eq(b) + end + + it "reverses with reverse_run correctly" do + a = stage { + sub "X", "Y", reverse_run: true + parallel { + sub "a", "b", reverse_run: false + sub "c", "d" + sub "e", "f" + } + } + + b = stage { + parallel { + sub "f", "e" + sub "d", "c" + sub "b", "a", reverse_run: true + } + sub "Y", "X", reverse_run: false + } + + expect(a.reverse).to eq(b) + end + end + + describe "item tests" do + it "transforms boundary" do + a = stage { + sub "a"+boundary, "b" + } + + b = stage { + sub "b"+boundary, "a" + } + + expect(a.reverse).to eq(b) + end + + it "transforms captures and references" do + a = stage { + sub capture("a"), ref(1)+"b" + } + + b = stage { + sub capture("a")+"b", ref(1) + } + + expect(a.reverse).to eq(b) + end + + it "doesn't transform any" do + a = stage { + sub any("ab"), any("bc") + } + + b = stage { + sub any("bc"), any("ab") + } + + expect(a.reverse).to eq(b) + end + end + + describe "document transformations" do + it "transforms document name correctly when it transforms between different character sets" do + a = document("var-kor-Kore-Hang-test") { } + + expect(a.reverse.name).to eq("var-kor-Hang-Kore-test") + end + + it "transforms document name correctly when it transforms between the same character sets" do + a = document("var-swe-Latn-Latn-test") { } + + expect(a.reverse.name).to eq("var-swe-Latn-Latn-test-reverse") + end + + it "transforms input and output charset in metadata correctly" do + a = document {} + a.metadata = Interscript::Node::MetaData.new + a.metadata[:source_script] = "Hani" + a.metadata[:destination_script] = "Latn" + + b = a.reverse + expect(b.metadata[:source_script]).to eq("Latn") + expect(b.metadata[:destination_script]).to eq("Hani") + end + + it "transforms tests" do + a = document { + tests { + test "a", "b" + test "c", "d" + } + } + + b = document { + tests { + test "b", "a" + test "d", "c" + } + } + + expect(a.reverse).to eq(b) + end + end + + describe "reverse_run" do + it "transliterates correctly with reverse_run: nil" do + a = stage { + sub "a", "b" + } + b = a.reverse + + expect(a.("ab")).to eq("bb") + expect(b.("ab")).to eq("aa") + end + + it "transliterates correctly with reverse_run: true" do + a = stage { + sub "a", "b", reverse_run: true + } + b = a.reverse + + expect(a.("ab")).to eq("ab") + expect(b.("ab")).to eq("aa") + end + + it "transliterates correctly with reverse_run: false" do + a = stage { + sub "a", "b", reverse_run: false + } + b = a.reverse + + expect(a.("ab")).to eq("bb") + expect(b.("ab")).to eq("ab") + end + + it "transliterates correctly with reverse_run and parallel" do + a = stage { + parallel { + sub "a", "b", reverse_run: true + sub "c", "d", reverse_run: false + } + } + b = a.reverse + + expect(a.("abcd")).to eq("abdd") + expect(b.("abcd")).to eq("aacd") + end + end + + describe "multistage" do + it "correctly reverses multistage documents" do + a = document("multistage-reversibility") { + stage { + sub "a", "b" + sub "c", "d" + run stage.second + } + + stage(:second) { + sub "e", "f" + } + } + b = a.reverse + + expect(a.("abcdef")).to eq("bbddff") + expect(b.("abcdef")).to eq("aaccee") + end + + it "correctly reverses multistage documents with dont_reverse" do + a = document("multistage-reversibility-dr") { + stage { + sub "a", "b" + sub "c", "d" + run stage.second + } + + stage(:second, dont_reverse: true) { + sub "e", "f" + } + } + b = a.reverse + + expect(a.("abcdef")).to eq("bbddff") + expect(b.("abcdef")).to eq("aaccff") + end + end +end \ No newline at end of file diff --git a/ruby/spec/spec_helper.rb b/spec/spec_helper.rb similarity index 54% rename from ruby/spec/spec_helper.rb rename to spec/spec_helper.rb index e0ec0481..101688ff 100644 --- a/ruby/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -9,6 +9,8 @@ require "interscript" require "interscript/compiler/ruby" require "interscript/compiler/javascript" unless ENV["SKIP_JS"] +require "interscript/compiler/python" unless ENV["SKIP_PYTHON"] +require "interscript/utils/helpers" RSpec.configure do |config| # Enable flags like --only-failures and --next-failure @@ -21,50 +23,17 @@ c.syntax = :expect end - def document name=nil, &block - $example_id ||= 0 - $example_id += 1 - name ||= "example-#{$example_id}" - - Interscript::DSL::Document.new(name, &block).node.tap do |i| - $documents ||= {} - $documents[name] = i - end - end - - def stage &block - document { - stage(&block) - } - end + include Interscript::Utils::Helpers def each_compiler &block compilers = [] compilers << Interscript::Interpreter compilers << Interscript::Compiler::Ruby compilers << Interscript::Compiler::Javascript unless ENV["SKIP_JS"] + compilers << Interscript::Compiler::Python unless ENV["SKIP_PYTHON"] compilers.each do |compiler| block.(compiler) end end end - -class Interscript::Node::Document - def call(str, stage=:main, compiler=$compiler || Interscript::Interpreter, **kwargs) - compiler.(self).(str, stage, **kwargs) - end -end - -module Interscript::DSL - class << self - alias original_parse parse - def parse(map_name) - if $documents && $documents[map_name] - $documents[map_name] - else - original_parse(map_name) - end - end - end -end diff --git a/ruby/spec/transliterate_each_spec.rb b/spec/transliterate_each_spec.rb similarity index 100% rename from ruby/spec/transliterate_each_spec.rb rename to spec/transliterate_each_spec.rb