diff --git a/.github/workflows/assets.yml b/.github/workflows/assets.yml index ae572e5e..39c128e6 100644 --- a/.github/workflows/assets.yml +++ b/.github/workflows/assets.yml @@ -4,6 +4,7 @@ on: push: tags: - 'v*' + workflow_dispatch: jobs: assets: @@ -12,18 +13,28 @@ jobs: BUNDLE_WITHOUT: "secryst:jsexec" SKIP_JS: "1" steps: - - name: Checkout repository and submodules + - name: Checkout repository uses: actions/checkout@v2 with: - submodules: true + repository: interscript/interscript + + - name: Run bootstrap script + run: ruby bootstrap.rb + - name: Use Ruby uses: ruby/setup-ruby@v1 with: - ruby-version: 3.0 + ruby-version: "3.0" bundler-cache: true - working-directory: ./ruby + + - name: Install bundle + working-directory: ./ruby + run: bundle install --jobs 4 --retry 3 --with jsexec --without secryst + - name: Generate visualization json - run: pushd ruby; bundle exec rake generate_visualization_json; popd + working-directory: ./ruby + run: bundle exec rake generate_visualization_json + - name: Archive json files from the previous step uses: thedoctor0/zip-release@master with: @@ -32,6 +43,7 @@ jobs: directory: ./ruby/ exclusions: '*.git*' type: zip + - name: Upload artifacts id: upload_vis_json uses: svenstaro/upload-release-action@2.2.1 @@ -41,8 +53,11 @@ jobs: file_glob: true tag: ${{ github.ref }} overwrite: true + - name: Generate metadata - run: pushd ruby; bundle exec rake generate_metadata_json; popd + working-directory: ./ruby + run: bundle exec rake generate_metadata_json + - name: Archive metadata from the previous step uses: thedoctor0/zip-release@master with: @@ -51,6 +66,7 @@ jobs: directory: ./ruby/ exclusions: '*.git*' type: zip + - name: Upload metadata id: upload_metadata uses: svenstaro/upload-release-action@2.2.1 @@ -60,11 +76,13 @@ jobs: asset_name: metadata.json.zip tag: ${{ github.ref }} overwrite: true + - name: Output link run: | echo ${{ steps.upload_vis_json.outputs.browser_download_url }} echo ${{ steps.upload_metadata.outputs.browser_download_url }} -# - name: Trigger interscript.org + +# - name: Trigger deploy at interscript.org # uses: peter-evans/repository-dispatch@v1 # with: # token: ${{ secrets.INTERSCRIPT_CI_TOKEN }} diff --git a/.github/workflows/rake.yml b/.github/workflows/rake.yml index 9ab9f3ab..6a789f3e 100644 --- a/.github/workflows/rake.yml +++ b/.github/workflows/rake.yml @@ -2,54 +2,42 @@ name: rake on: push: - branches: [ master, main ] + branches: [ main, v*, ci-check ] tags: [ v* ] pull_request: -defaults: - run: - working-directory: ./ruby - jobs: rspec: name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }} runs-on: ${{ matrix.os }} - continue-on-error: ${{ matrix.experimental }} + continue-on-error: true strategy: fail-fast: false matrix: - ruby: [ 2.7, 2.6, 2.5 ] + ruby: [ 3.3, 3.2, 3.1, "3.0", 2.7, 2.6 ] os: [ ubuntu-latest, windows-latest, macos-latest ] - experimental: [ false ] - include: - - ruby: 3.0 - os: 'ubuntu-latest' - experimental: true - - ruby: 3.0 - os: 'windows-latest' - experimental: true - - ruby: 3.0 - os: 'macos-latest' - experimental: true - env: BUNDLE_WITHOUT: "secryst" SKIP_JS: "1" steps: - - name: Checkout repository and submodules + - name: Checkout repository uses: actions/checkout@v2 with: - submodules: true + repository: interscript/interscript + + - name: Run bootstrap script + run: ruby bootstrap.rb - name: Use Ruby uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby }} bundler-cache: true - working-directory: ./ruby - name: Run RSpecs + working-directory: ./ruby run: | + pip install regex bundle install --with=jsexec - bundle exec rspec -f f + bundle exec rspec diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f8d92e20..d8de6a2e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,14 +9,17 @@ jobs: release: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - name: Checkout repository + uses: actions/checkout@v2 with: - submodules: true + repository: interscript/interscript - - uses: actions/setup-ruby@v1 + - name: Run bootstrap script + run: ruby bootstrap.rb + + - uses: ruby/setup-ruby@v1 with: - ruby-version: '2.7' - architecture: 'x64' + ruby-version: '3.0' - uses: actions/setup-node@v1 with: @@ -24,45 +27,34 @@ jobs: # For now let's install without secryst, as we don't necessarily need it. # We may need to change it once we start to depend on secryst maps. - - run: pushd ruby && bundle install --jobs 4 --retry 3 --with jsexec --without secryst && popd + - name: Install bundle + working-directory: ./ruby + run: bundle install --jobs 4 --retry 3 --with jsexec --without secryst - - name: Test the Ruby package - run: pushd ruby && bundle exec rake && popd + - name: Test Ruby package + working-directory: ./ruby + run: bundle exec rake - - name: Test the JS package - run: pushd js && npm install && npm run prepareMaps && npm test && popd + - name: Test JS package + working-directory: ./js + run: npm install && npm run prepareMaps && npm test - name: Publish to rubygems.org env: RUBYGEMS_API_KEY: ${{secrets.INTERSCRIPT_RUBYGEMS_API_KEY}} run: | gem install gem-release + mkdir -p ~/.gem touch ~/.gem/credentials cat > ~/.gem/credentials << EOF --- :rubygems_api_key: ${RUBYGEMS_API_KEY} EOF chmod 0600 ~/.gem/credentials - pushd js - git status - popd - pushd maps - git status - gem release - popd pushd ruby git status gem release popd - - name: Publish to npmjs.org - env: - NPMJS_TOKEN: ${{secrets.INTERSCRIPT_NPM_TOKEN}} - run: | - pushd js - npm config set //registry.npmjs.org/:_authToken=$NPMJS_TOKEN - npm run prepareMaps - npm publish - popd # Let's keep it commented out for now. Please uncomment it once you are ready with # interscript-api to support Interscript v2. diff --git a/.gitignore b/.gitignore index a049b922..6f2070e6 100644 --- a/.gitignore +++ b/.gitignore @@ -4,12 +4,12 @@ /coverage/ /InstalledFiles /pkg/ -/ruby/spec/reports/ -/ruby/spec/examples.txt -/ruby/test/tmp/ -/ruby/test/version_tmp/ -/ruby/tmp/ -/ruby/Gemfile.lock +/spec/reports/ +/spec/examples.txt +/test/tmp/ +/test/version_tmp/ +/tmp/ +/Gemfile.lock # Used by dotenv library to load environment variables. # .env @@ -20,10 +20,10 @@ ## Specific to RubyMotion: .dat* .repl_history -/ruby/build/ +/build/ *.bridgesupport -/ruby/build-iPhoneOS/ -/ruby/build-iPhoneSimulator/ +/build-iPhoneOS/ +/build-iPhoneSimulator/ ## Specific to RubyMotion (use of CocoaPods): # @@ -34,15 +34,15 @@ # vendor/Pods/ ## Documentation cache and generated files: -/ruby/.yardoc/ -/ruby/_yardoc/ -/ruby/doc/ -/ruby/rdoc/ +/.yardoc/ +/_yardoc/ +/doc/ +/rdoc/ ## Environment normalization: -/ruby/.bundle/ -/ruby/vendor/bundle -/ruby/lib/bundler/man/ +/.bundle/ +/vendor/bundle +/lib/bundler/man/ # for a library or gem, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: @@ -51,21 +51,29 @@ # .ruby-gemset # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: -/ruby/.rvmrc +/.rvmrc # Don't bundle generated files -/ruby/visualizations -/ruby/json -/ruby/vis_json -/ruby/auth_json -/ruby/metadata.json -/ruby/compiled +/visualizations +/json +/vis_json +/auth_json +/metadata.json +/compiled # Used by RuboCop. Remote config files pulled in from inherit_from directive. # .rubocop-https?--* -/js/src/maps -/js/node_modules #misic .idea -.bundle \ No newline at end of file +.bundle/.bundle/ +/.yardoc +/_yardoc/ +/coverage/ +/doc/ +/pkg/ +/spec/reports/ +/tmp/ + +# rspec failure tracking +.rspec_status diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 5e037dd3..00000000 --- a/.gitmodules +++ /dev/null @@ -1,6 +0,0 @@ -[submodule "maps"] - path = maps - url = https://github.com/interscript/maps.git -[submodule "js"] - path = js - url = https://github.com/interscript/interscript-js.git diff --git a/ruby/.rspec b/.rspec similarity index 100% rename from ruby/.rspec rename to .rspec diff --git a/ruby/Gemfile b/Gemfile similarity index 89% rename from ruby/Gemfile rename to Gemfile index 17c184a8..cf4f70d5 100644 --- a/ruby/Gemfile +++ b/Gemfile @@ -26,6 +26,12 @@ unless ENV["SKIP_JS"] end end +unless ENV["SKIP_PYTHON"] + group :pyexec do + gem 'pycall' + end +end + group :rababa do gem 'rababa', "~> 0.1.1" end diff --git a/README.adoc b/README.adoc deleted file mode 100644 index 99a22f52..00000000 --- a/README.adoc +++ /dev/null @@ -1,197 +0,0 @@ -= Interscript: Interoperable Script Conversion Systems, with Ruby and JavaScript runtimes - -image:https://github.com/interscript/interscript/workflows/test/badge.svg["Ruby build status", link="https://github.com/interscript/interscript/actions?workflow=test"] -image:https://github.com/interscript/interscript/workflows/js/badge.svg["JavaScript build status", link="https://github.com/interscript/interscript/actions?workflow=js"] - -== Introduction - -This repository contains interoperable transliteration schemes from: - -* ALA-LC -* BGN/PCGN -* ICAO -* ISO -* UN (by UNGEGN) -* Many, many other script conversion system authorities. - -The goal is to achieve interoperable transliteration schemes allowing quality comparisons. - - - -== Demonstration - -These transliteration systems are used in the demo: - -`bgnpcgn-rus-Cyrl-Latn-1947`:: BGN/PCGN Romanization of Russian -`iso-rus-Cyrl-Latn-9-1995`:: ISO 9 Romanization of Russian -`icao-rus-Cyrl-Latn-9303`:: ICAO MRZ Romanization of Russian -`bas-rus-Cyrl-Latn-2017-bss`:: Bulgaria Academy of Science Streamlined System for Russian - -image:docs/demo/20191118-interscript-demo-cast.gif["interscript screencast"] - - -== Installation - -=== Prerequisites - -Interscript depends on Ruby. Once you manage to install Ruby, it's easy. This part -won't work until we release Interscript v2, please use the one below. - -[source,sh] ----- -gem install interscript -v "~>2.0" ----- - -You can also download a local copy of this Git repository, eg. for development -purposes: - -[source,sh] ----- -git clone https://github.com/interscript/lcs -cd lcs/ruby -bundle install ----- - -==== Additional prerequisites for Thai systems - -If you want to transliterate Thai systems, you will need to install some additional -requirements. Please consult: link:docs/Usage_with_Secryst.adoc[Usage with Secryst]. - -== Usage - -Assume you have a file ready in the source script like this: - -[source,sh] ----- -cat < rus-Cyrl.txt -Эх, тройка! птица тройка, кто тебя выдумал? знать, у бойкого народа ты -могла только родиться, в той земле, что не любит шутить, а -ровнем-гладнем разметнулась на полсвета, да и ступай считать версты, -пока не зарябит тебе в очи. И не хитрый, кажись, дорожный снаряд, не -железным схвачен винтом, а наскоро живьём с одним топором да долотом -снарядил и собрал тебя ярославский расторопный мужик. Не в немецких -ботфортах ямщик: борода да рукавицы, и сидит чёрт знает на чём; а -привстал, да замахнулся, да затянул песню — кони вихрем, спицы в -колесах смешались в один гладкий круг, только дрогнула дорога, да -вскрикнул в испуге остановившийся пешеход — и вон она понеслась, -понеслась, понеслась! - -Н.В. Гоголь -EOT ----- - -You can run `interscript` on this text using different transliteration systems. - -[source,sh] ----- -interscript rus-Cyrl.txt \ - --system=bgnpcgn-rus-Cyrl-Latn-1947 \ - --output=bgnpcgn-rus-Latn.txt - -interscript rus-Cyrl.txt \ - --system=iso-rus-Cyrl-Latn-9-1995 \ - --output=iso-rus-Latn.txt - -interscript rus-Cyrl.txt \ - --system=icao-rus-Cyrl-Latn-9303 \ - --output=icao-rus-Latn.txt - -interscript rus-Cyrl.txt \ - --system=bas-rus-Cyrl-Latn-2017-bss \ - --output=bas-rus-Latn.txt ----- - -It is then easy to see the exact differences in rendering between the systems. - -[source,sh] ----- -diff bgnpcgn-rus-Latn.txt bas-rus-Latn.txt ----- - -You can also run some maps in reverse (warning: not all maps are made to be reversed), -either by changing an order of character sets in a name (for bas-rus-`Cyrl-Latn`-2017-bss -it would be bas-rus-`Latn-Cyrl`-2017-bss) or appending `-reverse` to their name: - -[source,sh] ----- -interscript my_jamos.txt \ - --system=var-kor-Hang-Hang-jamo-reverse \ - --output=my_hangul.txt ----- - -You can also compose some maps, by joining their names with `|` (similar to how bash -pipe works). For instance: - -[source,sh] ----- -interscript my_file.txt \ - --system='var-map1-Xxxx-Xxxx|var-map2-Xxxx-Xxxx' \ - --output=my_output.txt ----- - -If you use Interscript from the Git repository, you would call the following command -instead of `interscript`: - -[source,sh] ----- -# Ensure you are in your Git repository root path -ruby/bin/interscript rus-Cyrl.txt \ - --system=bas-rus-Cyrl-Latn-2017-bss \ - --output=bas-rus-Latn.txt ----- - -== Adding transliteration system - -Please consult link:docs/Map_Editing_Guide.adoc[the Map Editing Guide] - -== Integration with Ruby applications - -Please consult link:docs/Integration_with_Ruby_Applications.adoc[the guide for integration with Ruby applications] - -== ISCS system codes - -In accordance with -http://calconnect.gitlab.io/tc-localization/csd-transcription-systems[ISO/CC 24229], -the system code identifying a script conversion system has the following components: - -e.g. `bgnpcgn-rus-Cyrl-Latn-1947`: - -`bgnpcgn`:: the authority identifier -`rus`:: an ISO 639-{1,2,3,5} language code that this system applies to (For 639-2, use (T) code) -`Cyrl`:: an ISO 15924 script code, identifying the source script -`Latn`:: an ISO 15924 script code, identifying the target script -`1947`:: an identifier unit within the authority to identify this system - - -== Covered languages - -Currently the schemes cover Cyrillic, Armenian, Greek, Arabic and Hebrew. - - -== Samples to play with - -* `rus-Cyrl-1.txt`: Copied from the XLS output from http://www.primorsk.vybory.izbirkom.ru/region/primorsk?action=show&global=true&root=254017025&tvd=4254017212287&vrn=100100067795849&prver=0&pronetvd=0®ion=25&sub_region=25&type=242&vibid=4254017212287 - -* `rus-Cyrl-2.txt`: Copied from the XLS output from http://www.yaroslavl.vybory.izbirkom.ru/region/yaroslavl?action=show&root=764013001&tvd=4764013188704&vrn=4764013188693&prver=0&pronetvd=0®ion=76&sub_region=76&type=426&vibid=4764013188704 - - -== References - -Reference documents are located at the -https://github.com/interscript/interscript-references[interscript-references repository]. -Some specifications that have distribution limitations may not be reproduced there. - - -== Links to system definitions - -* https://www.iso.org/committee/48750.html[ISO/TC 46 (see standards published by WG 3)] -* http://geonames.nga.mil/gns/html/romanization.html[BGN/PCGN and BGN Romanization systems (BGN)] -* https://www.gov.uk/government/publications/romanization-systems[BGN/PCGN Romanization systems (PCGN)] -* https://www.loc.gov/catdir/cpso/roman.html[ALA-LC Romanization systems in current use] -* http://catdir.loc.gov/catdir/cpso/roman.html[ALA-LC Romanization systems from 1997] -* http://www.eki.ee/wgrs/[UN Romanization systems] -* http://www.eki.ee/knab/kblatyl2.htm[EKI KNAB systems] - -== Copyright and license - -This is a Ribose project. Copyright Ribose. diff --git a/ruby/README.md b/README.md similarity index 100% rename from ruby/README.md rename to README.md diff --git a/ruby/Rakefile b/Rakefile similarity index 83% rename from ruby/Rakefile rename to Rakefile index 34a0dee3..e5ed34b3 100644 --- a/ruby/Rakefile +++ b/Rakefile @@ -14,6 +14,9 @@ task :compile, [:compiler, :target] do |t, args| when "javascript" require "interscript/compiler/javascript" [Interscript::Compiler::Javascript, "js"] + when "python" + require "interscript/compiler/python" + [Interscript::Compiler::Python, "py"] end FileUtils.mkdir_p(args[:target]) @@ -34,23 +37,7 @@ task :compile, [:compiler, :target] do |t, args| File.write(args[:target] + "/" + map + "." + ext, code) end - File.write(args[:target] + "/index.json", maplist.to_json) -end - -task :version, [:ver] do |t, ver| - ver = ver[:ver] - - rubyver = File.read(rubyfile = __dir__+"/lib/interscript/version.rb") - jsver = File.read(jsfile = __dir__+"/../js/package.json") - mapsver = File.read(mapsfile = __dir__+"/../maps/interscript-maps.gemspec") - - rubyver = rubyver.gsub(/(VERSION = ")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") - jsver = jsver.gsub(/("version": ")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") - mapsver = mapsver.gsub(/(INTERSCRIPT_MAPS_VERSION=")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") - - File.write(rubyfile, rubyver) - File.write(jsfile, jsver) - File.write(mapsfile, mapsver) + File.write(args[:target] + "/index.json", maplist.to_json) if args[:compiler] == "javascript" end task :generate_visualization_html do diff --git a/ruby/bin/console b/bin/console similarity index 100% rename from ruby/bin/console rename to bin/console diff --git a/ruby/bin/interscript b/bin/interscript similarity index 100% rename from ruby/bin/interscript rename to bin/interscript diff --git a/ruby/bin/maps_analyze_staging b/bin/maps_analyze_staging similarity index 100% rename from ruby/bin/maps_analyze_staging rename to bin/maps_analyze_staging diff --git a/ruby/bin/maps_debug_compilers b/bin/maps_debug_compilers similarity index 100% rename from ruby/bin/maps_debug_compilers rename to bin/maps_debug_compilers diff --git a/ruby/bin/maps_debug_ordering b/bin/maps_debug_ordering similarity index 100% rename from ruby/bin/maps_debug_ordering rename to bin/maps_debug_ordering diff --git a/ruby/bin/maps_debug_ruby_compile b/bin/maps_debug_ruby_compile similarity index 100% rename from ruby/bin/maps_debug_ruby_compile rename to bin/maps_debug_ruby_compile diff --git a/ruby/bin/maps_debug_step_by_step b/bin/maps_debug_step_by_step similarity index 100% rename from ruby/bin/maps_debug_step_by_step rename to bin/maps_debug_step_by_step diff --git a/ruby/bin/maps_optimize_order b/bin/maps_optimize_order similarity index 100% rename from ruby/bin/maps_optimize_order rename to bin/maps_optimize_order diff --git a/ruby/bin/maps_v1_analyze_regexps b/bin/maps_v1_analyze_regexps similarity index 100% rename from ruby/bin/maps_v1_analyze_regexps rename to bin/maps_v1_analyze_regexps diff --git a/ruby/bin/maps_v1_to_v2 b/bin/maps_v1_to_v2 similarity index 100% rename from ruby/bin/maps_v1_to_v2 rename to bin/maps_v1_to_v2 diff --git a/bin/set_version b/bin/set_version new file mode 100755 index 00000000..aa99c3d5 --- /dev/null +++ b/bin/set_version @@ -0,0 +1,16 @@ +#!/usr/bin/env ruby +ver = ARGV[0] +part = ARGV[1] + +rubyver = File.read(rubyfile = __dir__+"/../lib/interscript/version.rb") +jsver = File.read(jsfile = __dir__+"/../../js/package.json") +mapsver = File.read(mapsfile = __dir__+"/../../maps/interscript-maps.gemspec") + +rubyver = rubyver.gsub(/(VERSION = ")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") +jsver = jsver.gsub(/("version": ")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") +mapsver = mapsver.gsub(/(INTERSCRIPT_MAPS_VERSION=")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") + +File.write(rubyfile, rubyver) if %w[all ruby].include? part +File.write(jsfile, jsver) if %w[all js].include? part +File.write(mapsfile, mapsver) if %w[all maps].include? part + diff --git a/ruby/bin/setup b/bin/setup similarity index 100% rename from ruby/bin/setup rename to bin/setup diff --git a/docs/Integration_with_Ruby_Applications.adoc b/docs/Integration_with_Ruby_Applications.adoc deleted file mode 100644 index 3d05f406..00000000 --- a/docs/Integration_with_Ruby_Applications.adoc +++ /dev/null @@ -1,85 +0,0 @@ -= Integration with Ruby Applications - -Interscript can be used as a Ruby Gem library to be integrated with other Ruby -applications. - -== Gemfile - -You need to make sure your Gemfile contains the following lines: - -[source,ruby] ----- -source "https://rubygems.org" - -gem "interscript", "~>2.0" ----- - -== Requiring - -In your codebase, if you don't do `Bundler.require`, you will need to add the -following line: - -[source,ruby] ----- -require "interscript" ----- - -== Listing all available maps - -To list all available maps, one must execute the following code: - -[source,ruby] ----- -maps = Interscript.maps ----- - -`maps` will be an array containing all Interscript maps by their name. - -== Transliterating text - -To transliterate test using a given map, like `bas-rus-Cyrl-Latn-2017-bss`, -one must execute: - -[source,ruby] ----- -cache = {} -input = "Хелло" -output = Interscript.transliterate("bas-rus-Cyrl-Latn-2017-bss", - input, - cache) ----- - -You should preserve the `cache` variable for performance reasons. It is optional, -you don't need to (but should) supply it. - -=== Using Ruby compiler - -If performance is of utmost performance for your application and you want to -sacrifice a little bit of loading time for much better performance, you can use -`Interscript::Compiler::Ruby` instead of `Interscript::Interpreter` (which is -used by default). - -[source,ruby] ----- -require "interscript/compiler/ruby" - -cache = {} -input = "Хелло" -output = Interscript.transliterate("bas-rus-Cyrl-Latn-2017-bss", - input, - cache, - compiler: Interscript::Compiler::Ruby) ----- - -=== Transliterating in reverse - -To reverse a given string using a map with a name of a form: -`bas-rus-Cyrl-Latn-2017-bss`, change places for Cyrl and Latn. - -To reverse a given string using a map with a name of a form: -`var-swe-Latn-Latn-2021`, append `-reverse` to its name. - -Please note: this only works for Ruby implementation. Other implementations -depend on the Ruby implementation for the purpose of compilation. For those, -you need to compile the map using the Ruby implementation, but the name has -to be given according to the above hint. diff --git a/docs/Interscript_Map_Format.adoc b/docs/Interscript_Map_Format.adoc deleted file mode 100644 index c5d8fd6b..00000000 --- a/docs/Interscript_Map_Format.adoc +++ /dev/null @@ -1,524 +0,0 @@ -= Interscript Map format syntax - -This document describes the DSL-based files with an extension `.iml` or `.imp`. - -An `.imp` file is a file containing a standalone transliteration map. For -instance, a map that can transliterate a Korean file to a Latin file. - -An `.iml` file is a file that contains a library of aliases and stages to be -used by the `.imp` maps. It follows the same format, but does not require the -metadata and tests parts to exist and doesn't allow the `main` stage to exist. -This document describes the map version of the format if it isn't noted -otherwise. - -== Basic syntax - -A `\#` character is a comment character. This means, that the part that follows -a `#` character till the end of the line is ignored by Interscript, but exist to -communicate to a human reader the intention behind the content. In this document -it is most often a hint to a person reading this document. - -A String is a part of the document of a form either: `"content"` or `'content'`. -It denotes a group of characters to be used. It can be joined together using a -`+` character like so: `"a" + "b"` which is equal to as if someone wrote just -`"ab"`. - -It can also be joined in the following way: - -[source,ruby] ----- - "a string spanning multiple lines " \ - "on and on and on and on" \ - "and so on" ----- - -The `\\` character written before the end of the line ensures that our parser -treats the next line as a continuation of the previous one. This kind of syntax -makes multiline tests much clearer. - -Except for the strings of the form `'content'`, all those forms can contain -escape forms like `\u0410`, which means "An Unicode character 0410". The usage -of those forms is discouraged in new maps, but possible. A String can also -contain an escape character `\n` which means a line break. - -An array (or a list) is a part of the document of a form `["a", "b", "c"]`. It -means a sequential group of Strings, or other types. - -== Document - -The root part of the `.iml` file is called a document. A map has a format as -follows: - -[source,ruby] ----- -metadata { - # Metadata part comes here -} - -tests { - # Tests part comes here -} - -# A dependency directive may happen zero or more times. It will be described in -# a subsection. -dependency "other-map-or-library", as: shortname - -# This part is optional -aliases { - # Aliases part comes here -} - -stage { - # A stage description comes here -} - -# There may be more than 1 stage, the other stages need to have a name. The -# default stage name is `main`. A name can't happen more than once in a document. - -stage(stage_name) { - # A stage description comes here -} ----- - -=== Dependency - -Dependency is an instruction to be issued only in the document context. It means -that we want to import some aliases or stages from another map or a library. - -[source,ruby] ----- -dependency "other-map-or-library", as: shortname ----- - -This instruction will allow us to reference aliases and stages from other -libraries in this form: `map.shortname.stage.stagename` for stages and -`map.shortname.aliasname` for aliases. - -There is a second syntax, mostly useful for loading libraries that will import -the stages and aliases to a global context resulting in possibly more human -readable maps: - -[source,ruby] ----- -dependency "other-map-or-library", as: shortname, import: true ----- - -This form allows to reference other stages and aliases in the following form: -`stage.stagename`, `aliasname` - -It is not possible to load maps using this form, only libraries, because we -can't override the `main` stage. - -The standard library is implicitly imported this way. There's no way or need to -import it explicitly. - -==== Standard library - -All maps depend on a standard library implicitly. This standard library defines -a few useful aliases that may or may not be expressed otherwise. - -Below is a table that describes the aliases defined by the standard library: - -|=== -| `none` | An empty string -| `space` | A space character -| `whitespace` | Any whitespace ascii character (space, tab, line-delimiter, ...) -| `boundary` | A word boundary (see below for what institutes a word character) -| `word` | An ascii word character (a-z, A-Z, 0-9, _) -| `not_word` | Negation of the above -| `alpha` | Any ascii alphabetic character (a-z, A-Z) -| `not_alpha` | Negation of the above -| `digit` | Any ascii digit -| `not_digit` | Negation of the above -| `line_start` | Beginning of a line -| `line_end` | Ending of a line -| `string_start` | Beginning of a string -| `string_end` | Ending of a string -|=== - -Any standard library (or otherwise) aliases can be joined with anything else -using a + command, for example: `line_start + "rest"`. - -== Metadata part - -The metadata part describes our map. It follows a YAML syntax, and so contrary to -other parts of the document, it doesn't necessarily conform to all what's written -in the `Basic syntax` part of this guide. - -[source,ruby] ----- -metadata { - # ID of the authority that provided the transliteration rules we are about to implement - authority_id: iso - # ID of the rules, most often the year they were defined - id: 1996-method1 - # The language code of the map - language: iso-639-2:kor - # The source script of our map, in our example Hang for Hangul - source_script: Hang - # The destination script of our map - destination_script: Latn - # The longer name of our map - name: ISO/TR 11941:1996 Information and documentation — Transliteration of Korean script into Latin characters - # The URL where it was published - url: https://www.iso.org/standard/20564.html - # The creation date of our map - creation_date: 1996 - # The adoption date of our map, or empty if not adopted - adoption_date: "" - # The description of our map - description: | - Establishes a system for the transliteration of the characters of Korean script into Latin characters. - Intended to provide a means for international communication of written documents. - - # The notes that describe some parts of our map that we are about to implement - notes: - - A word-initial hard sign 'ъ' is not represented, but instead is left out of the transliteration. - - The romanization follows the dialect spoken in Chechnya rather than other local pronunciations. -} ----- - -== Tests part - -The tests part describes a group of the tests to be executed by the automated -system to verify that the map is defined properly. An example tests part looks -like this: - -[source,ruby] ----- -tests { - test "애기", "aeki" - test "방", "pang" -} ----- - -This means, that we want to test our map to transliterate a string "애기" to -"aeki" and "방" to "pang". - -== Aliases part - -An aliases part describes a group of aliases to be used by the stages to -simplify the code of our map. - -Let's suppose that our map refers to "Double consonant jamo" and "Aspirated -consonant jamo" quite extensively. We can alias those - -[source,ruby] ----- -aliases { - def_alias double_cons_jamo, any("ᄁᄄᄈᄍᄊ") - def_alias aspirated_cons_jamo, any("ᄏᄐᄑᄎ") -} ----- - -And later in the stage part refer to them by just `double_cons_jamo`, not -needing to repeat ourselves. - -== Stage part - -A stage part describes a stage, a sequential group of steps to transliterate -a string from a source script code to a destination script code. An example -stage looks like the following: - -[source,ruby] ----- -stage { - run map.hangjamo.stage.main - sub any("ᄀᆨ"), "k" - sub any("ᄏᆿ"), "kh" - parallel { - sub "ᅡ", "a" - sub "ᅥ", "eo" - } -} ----- - -A stage can be named, as described in the Document section. The default name -of a stage is `main`. - -=== `sub` call - -A `sub` call does a substitution of an item (string, character, alias) with -another item. - -[source,ruby] ----- -stage { - sub "source", "destination" -} ----- - -This call allows for some named parameters: - -[cols="2"] -|=== -| `before:` -| Execute this substitution only if the "source" is preceded by what is given - as a parameter, but won't replace it, it will only replace the "source". - -| `after:` -| Same, but this parameter denotes what is used after. - -| `not_before:`, `not_after:` -| Negation of `before:` and `after:`. The substitution will only happen if a - parameter is NOT present before or after the "source". -|=== - -For example: - -[source,ruby] ----- -stage { - sub boundary + "Е", "Ye", not_before: "’" - sub boundary + "е", "ye", not_before: "’" - - sub none, "'", not_before: hangul, after: aspirated_cons -} ----- - -==== Multiple replacements - -In various maps there was a need to document multiple replacements. Let's suppose -our character set has a character "a" that can be transliterated to any of the -forms "X", "Y" or "Z". As of now, it means that "a" is always translated to "X", -as it came first. In the future it will be possible to execute such a map in -reverse as well. - -[source,ruby] ----- -stage { - sub "a", any("XYZ") -} ----- - -=== `parallel` block - -A parallel block can be defined as a subsection of a `stage` part. It indicates -that the steps inside need to be executed in parallel. At the current time, only -`sub` calls can be executed in parallel. It also means, that those steps will try -to find the longest substrings first. - -[source,ruby] ----- -stage { - parallel { - sub "А", 'A' - sub "Б", 'B' - sub "В", 'V' - sub "Г", 'G' - } -} ----- - -==== Simple mode - -If there are only rules with simple sub rules, we are using a fast track -implementation. By simple sub rules we mean those rules that lack a before/after -part and ones that only use string and possibly `any` items with concatenation. - -=== `run` call - -The run call runs a stage defined inside the document, or another map or -library. If this map isn't local, a map or library dependency needs to be -declared using the `dependency` call. - -For example: - -[source,ruby] ----- -stage { - # If dependency declared without import: true - run map.hangjamo.stage.main - # If dependency declated with import: true, or we reference a local stage - run stage.remove_spaces -} ----- - -=== Standard library functions - -There are certain conversions that may be hard to be achieved using stages, those -are implemented in respective standard libraries using programming languages. - -For a function named `title_case`, it can be called with the following: - -[source,ruby] ----- -stage { - title_case -} ----- - -A standard library function can take (named) arguments. Those are described in -the table below and they may be omitted if a default value is specified. - -==== List of standard library functions - -[options="header"] -|=== -| Function name | Arguments | Sample input | Sample output -| `title_case` | `word_separator: " "` | `"example string"` | `"Example String"` -| `downcase` | | `"HELLO WORLD"` | `"hello world"` -| `compose` | | `"ᄆ"+"ᅮ"` | `"무"` -| `decompose` | | `"무"` | `"ᄆ"+"ᅮ"` -| `separate` | `separator: " "` | `"こんいちは"` | `"こ ん い ち は"` -| `secryst` | `model:` 2+>| Consult: link:Usage_with_Secryst.adoc[Usage with Secryst]. -| `rababa` | `config:` 2+>| Consult: link:Usage_with_Rababa.adoc[Usage with Rababa]. -|=== - -== Items - -Interscript doesn't work purely on Strings, even though Strings are mostly -referenced to by this document. The items can be used in the `alias` and `stage` -context. - -=== String item - -The most basic kind of item. For example `"Г"` means "match Г" or "replace -with Г" depending on usage context. Some contexts will only accept strings, or -aliases to strings. - -=== `+` method - -Items can be concatenated (added together) to denote a complex item. For instance: -`any("ab") + "e"` means "either ae or be" and is equivalent to `any(["ae", "be"])`. - -=== `any` item - -Any denotes some alternative variations of a string. It has 3 forms of call: - -* `any("abcde")` - any character: a, b, c, d or e -* `any(["one", "two"])` - any string: one or two -* `any("a".."z")` - any character from a to z - -Any can be also used with other kinds of items than String, for instance: - -[source,ruby] ----- -stage { - sub any([line_start + "a", "a" + line_end]), none -} ----- - -=== `maybe`, `some`, `maybe_some` items - -If you want a given item to be allowed to be repeated respectively: 0 to 1 times, -1 to Infinity times, 0 to Infinity times, you can surround it with respectively: -`maybe()`, `some()`, `maybe_some()`. - -[source,ruby] ----- -stage { - sub "a"+maybe("-")+"b", "AB" # Equivalent to regexp: a-?b - sub "a"+some("-")+"b", "AB" # Equivalent to regexp: a-+b - sub "a"+maybe_some("-")+"b", "AB" # Equivalent to regexp: a-*b -} ----- - -=== `alias` item - -An alias item references an alias. For example `map.other_map.alias_from_other_map` -or simply `a_local_alias_or_an_alias_from_imported_library`. - -=== `capture` and `ref` items - -Sometimes there may be a need to reference a group from input inside output (or -input too). People who know regular expressions are familiar with expressions of -some form of `replace /(a)/, '[\1]'`. Interscript supports this kind of syntax: - -[source,ruby] ----- -stage { - sub capture(any("abc")), "["+ref(1)+"]" -} ----- - -When ran against a string `"abcde"`, this stage will produce an output of -`"[a][b][c]de"`. - -== Reversibility - -Starting with Interscript 2.2 we added reversibility support. In general all -commands, except for metadata support a `reverse_run` keyword. This keyword -is `nil` by default, which means, that it's reversible. (One exception though: -`sub` call, if given a `none` as `to`, defaults to `reverse_run: false`). - -`reverse_run: false` means, that the command is only ran in forward. -`reverse_run: true`, on the other hand, means that the command is only ran in -reverse. - -Example 1: - -[source,ruby] ----- -stage { - sub "a", "b", reverse_run: true - sub "c", "d", reverse_run: false -} ----- - -When ran in forward mode (normal run) on a string "abcde", it gives "abdde". -When ran in reverse mode on a string "abcde", it gives "aacde". - -Example 2: - -[source,ruby] ----- -stage { - parallel { - sub "a", "あ" - sub "o", "お" - sub "i", "い" - } -} ----- - -When ran in forward mode (normal run) on a string "あおい", it gives "aoi". -When ran in reverse more on a string "aoi", it gives "あおい". - -The tests accept `reverse_run:` as well, and as before, it defaults to `nil`. -To run a test in only a single direction, you can write it as follows: - -[source,ruby] ----- -tests { - test "привет", "privet", reverse_run: false -} ----- - -Do note that even though a given command is given `reverse_run: true`, it -still needs to be written in forward. As in: it will be reversed. As with tests, -if you want to test a map in reverse that "privet" gives a "привет", you still -need to write "привет" as the first argument, "privet" as the second, and then -you need to supply `reverse_run: true`. - -We understand there may be a need that a given set of rules won't get reversed -(not all kinds of commands are reversible in principle). For this kind of usage, -we created a `dont_reverse: true` argument to `stage`. Note: no other commands -support this argument. An example map of this kind would look like this: - -[source,ruby] ----- -stage { - run stage.reverse_exclusive, reverse_run: true -} - -stage(reverse_exclusive, dont_reverse: true) { - sub 'a', 'b' -} ----- - -This map doesn't have any rules in forward, so when given "abcde" in forward, it -obviously returns "abcde". When given "abcde" in reverse it returns "bbcde" (if -`dont_reverse:` wouldn't be set, it would return "aacde"). - -== Composability - -To compose two maps together, whether in reverse or not, you can supply a name -in the form of: "map1-name|map2-name" everywhere a map name is accepted (in a -CLI utility or as a dependency). - -== Ending notes - -This document described everything Interscript currently supports, but it is -strongly advised to read the existing maps to get a grasp of how those -functionalities can be used best. diff --git a/docs/Maintainers.adoc b/docs/Maintainers.adoc deleted file mode 100644 index 878ebfcf..00000000 --- a/docs/Maintainers.adoc +++ /dev/null @@ -1,37 +0,0 @@ -= Maintainers - -This is a documentation intended for Interscript maintainers. It covers all possible tasks one -may need to execute. - -== Releasing - -The following is a set of steps one needs to make to do a release: - -[source,sh] ---- -# We assume this is executed in the main Interscript repository root directory. -# Adjust the V to reflect a correct version -V="2.1.0a1" -# Adjust the B to reflect a correct branch name. For now, master. In the future we may decide on -# how to do stable branches. -B="master" -# Commit command -COMMIT="git commit" -# Ensure we are on the latest repository version and all subrepos are up to date as well. -git checkout $B; git pull; git reset -pushd js; git checkout $B; git pull; git reset; popd -pushd maps; git checkout $B; git pull; git reset; popd -# This is the point when you may want to run tests and ensure everything is correct. -# Run the version update script -pushd ruby; bundle exec rake version[$V]; popd -# Add the new version to the submodules, commit it and tag it -pushd js; git add package.json; $COMMIT -m "Release v$V"; git tag "v$V"; popd -pushd maps; git add interscript-maps.gemspec; $COMMIT -m "Release v$V"; git tag "v$V"; popd -# Add the new version and submodules to the main repo, commit it and tag it -git add js maps ruby/lib/interscript/version.rb; $COMMIT -m "Release v$V"; git tag "v$V" -# Push everything in the correct order -pushd js; git push; git push --tags; popd -pushd maps; git push; git push --tags; popd -git push; git push --tags -# Our new version is released! ---- \ No newline at end of file diff --git a/docs/Map_Editing_Guide.adoc b/docs/Map_Editing_Guide.adoc deleted file mode 100644 index e1b10e2f..00000000 --- a/docs/Map_Editing_Guide.adoc +++ /dev/null @@ -1,50 +0,0 @@ -= Interscript map editing guide - -Transliteration systems stored in a `maps/maps/` directory as Interscript Map files. -You can create a new file and add it to the directory. - -The file should be named as `.imp`, where `system-code` -is in accordance with -http://calconnect.gitlab.io/tc-localization/csd-transcription-systems[ISO/CC 24229]. - -== File structure - -The file structure is described in link:Interscript_Map_Format.adoc[Interscript Map Format documentation] - -== Testing transliteration systems - -To test all transliteration systems in the `maps/` directory, run: - -[source,sh] ----- -cd ruby/ -bundle exec rspec ----- - -The command takes `source` texts from the `test` section, transforms -them using `rules` and `charmaps` from the `map` key, and compares the -results with `expected:` text from the `source:` section. - -To test a specific transliteration system, set the environment variable -`TRANSLIT_SYSTEM` to the system code of the desired system -(i.e. the "`basename`" of the system's YAML file): - -[source,sh] ----- -TRANSLIT_SYSTEM=bgnpcgn-rus-Cyrl-Latn-1947 bundle exec rspec spec/interscript_spec.rb ----- - -To test staging maps, which may or may not work, you would need to execute -a slightly different command: - -[source,sh] ----- -TRANSLIT_SYSTEM=bgnpcgn-rus-Cyrl-Latn-1947 INTERSCRIPT_STAGING=1 bundle exec rspec spec/interscript_spec.rb ----- - -== Testing new maps - -We envisioned various ways to extend this library with custom repositories of -maps, but the easiest way would be to run the `interscript` CLI utility, when -you are located in a directory in which you have your .imp files. To install -the `interscript` CLI utility you need to run `gem install interscript`. \ No newline at end of file diff --git a/docs/Usage_with_Rababa.adoc b/docs/Usage_with_Rababa.adoc deleted file mode 100644 index 5ac17347..00000000 --- a/docs/Usage_with_Rababa.adoc +++ /dev/null @@ -1,40 +0,0 @@ -= Usage with Rababa - -https://github.com/interscript/rababa/[RABABA] is the Arabic Diacritization Library -that uses Machine Learning to predict missing diactricts. It is well integrated with -Interscript. - -== Using it standalone - -Run: `gem install rababa` - -== Integration with Ruby Applications - -In your Gemfile, add: - -[source,ruby] ----- -source "https://rubygems.org" - -gem "rababa" ----- - -== Usage inside maps - -[source,ruby] ----- -stage { - rababa config: "200" -} ----- - -As of now, Rababa is usable only by the Ruby implementation. - -== Usage from command line - -[source,bash] ----- -interscript input.txt \ - --system=var-ara-Arab-Arab-rababa \ - --output=output.txt ----- \ No newline at end of file diff --git a/docs/Usage_with_Secryst.adoc b/docs/Usage_with_Secryst.adoc deleted file mode 100644 index 36e08456..00000000 --- a/docs/Usage_with_Secryst.adoc +++ /dev/null @@ -1,43 +0,0 @@ -= Usage with Secryst - -Secryst is a seq2seq transformer suited for transliteration. Written in Ruby. -It's installation is a bit tricky, you should consult its own installation guide -(https://github.com/secryst/secryst[at GitHub]). By default we don't use Secryst, -unless you have installed it. - -== Using it standalone - -It's enough to install it. Be sure to consult the guide above. - -== Integration with Ruby Applications - -In your Gemfile, add: - -[source,ruby] ----- -source "https://rubygems.org" - -gem "secryst" ----- - -Create a Secrystfile near your Gemfile with the following, for each model you -want to use in your application. Please consult our Secrystfile to get all the -maps needed to get all the Secryst maps needed. - -[source,ruby] ----- -model "model-name" ----- - -== Usage inside maps - -[source,ruby] ----- -stage { - # ... sub "a", "b" ... - secryst model: "model-name" - # ... sub "c", "d" ... -} ----- - -As of now, Secryst is usable only by the Ruby implementation. diff --git a/docs/demo/20191118-interscript-demo-cast.gif b/docs/demo/20191118-interscript-demo-cast.gif index fe881429..a2808c47 100644 Binary files a/docs/demo/20191118-interscript-demo-cast.gif and b/docs/demo/20191118-interscript-demo-cast.gif differ diff --git a/ruby/exe/interscript b/exe/interscript similarity index 100% rename from ruby/exe/interscript rename to exe/interscript diff --git a/ruby/interscript.gemspec b/interscript.gemspec similarity index 100% rename from ruby/interscript.gemspec rename to interscript.gemspec diff --git a/js b/js deleted file mode 160000 index e9e47945..00000000 --- a/js +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e9e479456cd889cd0930520232e71123c50da1b1 diff --git a/ruby/lib/interscript.rb b/lib/interscript.rb similarity index 87% rename from ruby/lib/interscript.rb rename to lib/interscript.rb index 90c859f4..21c9c2fc 100644 --- a/ruby/lib/interscript.rb +++ b/lib/interscript.rb @@ -2,7 +2,14 @@ require "yaml" module Interscript + # An error caused by a lack of some map class MapNotFoundError < StandardError; end + # An error caused by a missing dependency + class ExternalUtilError < StandardError; end + # An error caused by a particular compiler + class SystemConversionError < StandardError; end + # An error caused by an incorrect map implementation + class MapLogicError < StandardError; end class << self def load_path @@ -41,9 +48,9 @@ def transliterate_each(system_code, string, maps={}, &block) load(system_code, maps).(string, each: true, &block) end - def transliterate_file(system_code, input_file, output_file, maps={}) + def transliterate_file(system_code, input_file, output_file, maps={}, compiler: Interscript::Interpreter) input = File.read(input_file) - output = transliterate(system_code, input, maps) + output = transliterate(system_code, input, maps, compiler: compiler) File.open(output_file, 'w') do |f| f.puts(output) @@ -118,11 +125,12 @@ def rababa_provision(model_name, model_uri) ([ENV["RABABA_DATA"]] + possible_paths).compact.each do |path| FileUtils.mkdir_p(path) - write_path = path unless write_path + write_path = path + break rescue end - - raise StandardError, "Can't find a writable path for Rababa. Consider setting a RABABA_DATA environment variable" unless write_path + + raise ExternalUtilError, "Can't find a writable path for Rababa. Consider setting a RABABA_DATA environment variable" unless write_path model_path = "#{write_path}/model-#{model_name}.onnx" @@ -130,8 +138,8 @@ def rababa_provision(model_name, model_uri) if File.exist?(model_path) && File.mtime(model_path) + 3600 >= Time.now return model_path else - data = URI.open(model_uri).read - File.write(model_path, data) + data = URI.open(model_uri, encoding: "BINARY").read + File.binwrite(model_path, data) return model_path end end diff --git a/lib/interscript/command.rb b/lib/interscript/command.rb new file mode 100644 index 00000000..aa017e27 --- /dev/null +++ b/lib/interscript/command.rb @@ -0,0 +1,79 @@ +require 'thor' +require 'interscript' +require 'json' + +module Interscript + # Command line interface + class Command < Thor + desc '', 'Transliterate text' + option :system, aliases: '-s', required: true, desc: 'Transliteration system' + option :output, aliases: '-o', required: false, desc: 'Output file' + option :compiler, aliases: '-c', required: false, desc: 'Compiler (eg. Interscript::Compiler::Python)' + # Was this option really well thought out? The last parameter is a cache, isn't it? + #option :map, aliases: '-m', required: false, default: "{}", desc: 'Transliteration mapping json' + + def translit(input) + compiler = if options[:compiler] + compiler = options[:compiler].split("::").last.downcase + require "interscript/compiler/#{compiler}" + Object.const_get(options[:compiler]) + else + Interscript::Interpreter + end + + if options[:output] + Interscript.transliterate_file(options[:system], input, options[:output], compiler: compiler) + else + puts Interscript.transliterate(options[:system], IO.read(input), compiler: compiler) + end + end + + desc 'list', 'Prints allowed transliteration systems' + def list + Interscript.maps(load_path: true).each do |path| + puts path + end + end + + desc 'stats', 'Prints statistics about the maps we have' + def stats + maps = Interscript.maps(load_path: true) + parsed_maps = maps.map { |i| [i, Interscript.parse(i)] }.to_h + maps_by_rule_count = parsed_maps.transform_values do |map| + map.stages.values.map { |i| i.children.map { |j| j.is_a?(Interscript::Node::Group) ? j.children : j } }.flatten.count + end + + authorities, languages, source_scripts, target_scripts = 4.times.map do |i| + maps.group_by { |map| map.split('-')[i] } + end + + puts <<~END + Languages supported: #{languages.keys.count} + Source scripts supported: #{source_scripts.keys.count} + Target scripts supported: #{target_scripts.keys.count} + Authorities supported: #{authorities.keys.count} + Total number of rules in Interscript: #{maps_by_rule_count.values.sum} + + END + + authorities.each do |auth, auth_maps| + rule_counts = auth_maps.map { |i| maps_by_rule_count[i] } + puts <<~END + Authority #{auth}: + * Conversion systems: #{auth_maps.count} + * Total number of rules: #{rule_counts.sum} + + END + end + + puts <<~END + Interesting facts: + * #{maps_by_rule_count.max_by { |i| i.last }.first} has the most rules + * Authority #{authorities.max_by { |i| i.last.count }.first} has the most systems + * Language #{languages.max_by { |i| i.last.count }.first} has the most systems + * Source script #{source_scripts.max_by { |i| i.last.count }.first} has the most systems + * Target script #{target_scripts.max_by { |i| i.last.count }.first} has the most systems + END + end + end +end diff --git a/ruby/lib/interscript/compiler.rb b/lib/interscript/compiler.rb similarity index 100% rename from ruby/lib/interscript/compiler.rb rename to lib/interscript/compiler.rb diff --git a/ruby/lib/interscript/compiler/javascript.rb b/lib/interscript/compiler/javascript.rb similarity index 86% rename from ruby/lib/interscript/compiler/javascript.rb rename to lib/interscript/compiler/javascript.rb index 4b2ab338..07a79ed4 100644 --- a/ruby/lib/interscript/compiler/javascript.rb +++ b/lib/interscript/compiler/javascript.rb @@ -70,11 +70,11 @@ def compile_rule(r, map = @map, wrapper = false) # Try to build a tree a = [] r.children.each do |i| - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i - raise ArgumentError, "Can't parallelize rules with :before" if i.before - raise ArgumentError, "Can't parallelize rules with :after" if i.after - raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before - raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize rules with :before" if i.before + raise Interscript::SystemConversionError, "Can't parallelize rules with :after" if i.after + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_before" if i.not_before + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_after" if i.not_after next if i.reverse_run == true a << [compile_item(i.from, map, :par), compile_item(i.to, map, :parstr)] @@ -89,7 +89,7 @@ def compile_rule(r, map = @map, wrapper = false) # Otherwise let's build a megaregexp a = [] Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i| - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i next if i.reverse_run == true a << [build_regexp(i, map), compile_item(i.to, map, :parstr)] @@ -122,7 +122,7 @@ def compile_rule(r, map = @map, wrapper = false) end c += "s = Interscript.transliterate(#{stage.doc_name.to_json}, s, #{stage.name.to_json});\n" else - raise ArgumentError, "Can't compile unhandled #{r.class}" + raise Interscript::SystemConversionError, "Can't compile unhandled #{r.class}" end c end @@ -157,17 +157,17 @@ def compile_item i, doc=@map, target=nil astr = if i.map d = doc.dep_aliases[i.map].document a = d.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} of #{i.stage.map} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} of #{i.stage.map} not found" unless a "Interscript.get_alias_ALIASTYPE(#{a.doc_name.to_json}, #{a.name.to_json})" elsif Interscript::Stdlib::ALIASES.include?(i.name) if target != :re && Interscript::Stdlib.re_only_alias?(i.name) - raise ArgumentError, "Can't use #{i.name} in a #{target} context" + raise Interscript::SystemConversionError, "Can't use #{i.name} in a #{target} context" end stdlib_alias = true "Interscript.aliases.#{i.name}" else a = doc.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} not found" unless a "Interscript.get_alias_ALIASTYPE(#{a.doc_name.to_json}, #{a.name.to_json})" end @@ -205,7 +205,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::CaptureGroup if target != :re - raise ArgumentError, "Can't use a CaptureGroup in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a CaptureGroup in a #{target} context" end "(" + compile_item(i.data, doc, target) + ")" when Interscript::Node::Item::Maybe, @@ -217,7 +217,7 @@ def compile_item i, doc=@map, target=nil Interscript::Node::Item::MaybeSome => "*" }[i.class] if target == :par - raise ArgumentError, "Can't use a MaybeSome in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a MaybeSome in a #{target} context" end if Interscript::Node::Item::String === i.data && i.data.data.length != 1 "(?:" + compile_item(i.data, doc, target) + ")" + resuffix @@ -226,7 +226,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::CaptureRef if target == :par - raise ArgumentError, "Can't use CaptureRef in parallel mode" + raise Interscript::SystemConversionError, "Can't use CaptureRef in parallel mode" elsif target == :re "\\\\#{i.id}" elsif target == :str @@ -234,7 +234,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::Any if target == :str - raise ArgumentError, "Can't use Any in a string context" # A linter could find this! + raise Interscript::SystemConversionError, "Can't use Any in a string context" # A linter could find this! elsif target == :par i.data.map(&:data) elsif target == :re diff --git a/lib/interscript/compiler/python.rb b/lib/interscript/compiler/python.rb new file mode 100644 index 00000000..13722091 --- /dev/null +++ b/lib/interscript/compiler/python.rb @@ -0,0 +1,331 @@ +require 'pycall' + +class Interscript::Compiler::Python < Interscript::Compiler + def escape(val) + case val + when String, Integer + val.inspect + when Symbol + val.to_s.inspect + when Hash + "{"+ + val.map { |k,v| "#{escape k}:#{escape v}" }.join(",")+ + "}" + when Array + "[" + val.map { |i| escape i }.join(",") + "]" + when nil + "None" + else + pp [:error, val] + exit! + end + end + + def re_escape(val) + @pycall_regex ||= PyCall.import_module("regex") + @pycall_regex.escape(val).gsub("\\", "\\\\\\\\").gsub('"', "\\\\\"") + end + + def new_regexp(str) + "re.compile(\"#{str}\", re.MULTILINE)" + end + + def indent + @indent += 4 + yield + @indent -= 4 + end + + def emit(code) + @code << (" " * @indent) << code << "\n" + code + end + + def compile(map, debug: false) + @indent = 0 + @map = map + @debug = debug + @parallel_trees = {} + @parallel_regexps = {} + @code = "" + emit "import interscript" + emit "import regex as re" + map.dependencies.map(&:full_name).each do |dep| + emit "interscript.load_map(#{escape dep})" + end + + emit "interscript.stdlib.define_map(#{escape map.name})" + + map.aliases.each do |name, value| + val = compile_item(value.data, map, :str) + emit "interscript.stdlib.add_map_alias(#{escape map.name}, #{escape name}, #{val})" + val = "\"" + compile_item(value.data, map, :re) + "\"" + emit "interscript.stdlib.add_map_alias_re(#{escape map.name}, #{escape name}, #{val})" + end + + map.stages.each do |_, stage| + compile_rule(stage, @map, true) + end + @parallel_trees.each do |k,v| + emit "_PTREE_#{k} = #{escape v}" + end + @parallel_regexps.each do |k,v| + v = %{["#{v[0]}", #{escape v[1]}]} + emit "_PRE_#{k} = #{v}" + end + end + + def parallel_regexp_compile(subs_hash) + # puts subs_hash.inspect + regexp = subs_hash.each_with_index.map do |p,i| + "(?P<_%d>%s)" % [i,p[0]] + end.join("|") + subs_regexp = regexp + # puts subs_regexp.inspect + end + + def compile_rule(r, map = @map, wrapper = false) + return if r.reverse_run == true + case r + when Interscript::Node::Stage + if @debug + emit "if not hasattr(interscript, 'map_debug'):" + indent { emit "interscript.map_debug = []" } + end + emit "def _stage_#{r.name}(s):" + indent do + r.children.each do |t| + comp = compile_rule(t, map) + emit %{interscript.map_debug.append([s, #{escape @map.name.to_s}, #{escape r.name.to_s}, #{escape t.inspect}, #{escape comp}])} if @debug + end + emit "return s\n" + end + emit "interscript.stdlib.add_map_stage(#{escape @map.name}, #{escape r.name}, _stage_#{r.name})" + when Interscript::Node::Group::Parallel + begin + # Try to build a tree + a = [] + r.children.each do |i| + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize rules with :before" if i.before + raise Interscript::SystemConversionError, "Can't parallelize rules with :after" if i.after + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_before" if i.not_before + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_after" if i.not_after + + next if i.reverse_run == true + a << [compile_item(i.from, map, :par), compile_item(i.to, map, :parstr)] + end + ah = a.hash.abs + unless @parallel_trees.include? ah + tree = Interscript::Stdlib.parallel_replace_compile_tree(a) + @parallel_trees[ah] = tree + end + emit "s = interscript.stdlib.parallel_replace_tree(s, _PTREE_#{ah})" + rescue + # Otherwise let's build a megaregexp + a = [] + Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i| + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + + next if i.reverse_run == true + a << [build_regexp(i, map), compile_item(i.to, map, :parstr)] + end + ah = a.hash.abs + unless @parallel_regexps.include? ah + re = parallel_regexp_compile(a) + @parallel_regexps[ah] = [re, a.map(&:last)] + end + emit "s = interscript.stdlib.parallel_regexp_gsub(s, *_PRE_#{ah})" + end + when Interscript::Node::Rule::Sub + from = new_regexp build_regexp(r, map) + if r.to == :upcase + to = 'interscript.stdlib.upper' + elsif r.to == :downcase + to = 'interscript.stdlib.lower' + else + to = compile_item(r.to, map, :str) + end + emit "s = #{from}.sub(#{to}, s)" + when Interscript::Node::Rule::Funcall + emit "s = interscript.functions.#{r.name}(s, #{escape r.kwargs})" + when Interscript::Node::Rule::Run + if r.stage.map + doc = map.dep_aliases[r.stage.map].document + stage = doc.imported_stages[r.stage.name] + else + stage = map.imported_stages[r.stage.name] + end + emit "s = interscript.transliterate(#{escape stage.doc_name}, s, #{escape stage.name})" + else + raise Interscript::SystemConversionError, "Can't compile unhandled #{r.class}" + end + end + + def build_regexp(r, map=@map) + from = compile_item(r.from, map, :re) + before = compile_item(r.before, map, :re) if r.before + after = compile_item(r.after, map, :re) if r.after + not_before = compile_item(r.not_before, map, :re) if r.not_before + not_after = compile_item(r.not_after, map, :re) if r.not_after + + re = "" + re += "(?<=#{before})" if before + re += "(? "?" , + Interscript::Node::Item::Some => "+" , + Interscript::Node::Item::MaybeSome => "*" }[i.class] + + if target == :par + raise Interscript::SystemConversionError, "Can't use a Maybe in a #{target} context" + end + if Interscript::Node::Item::String === i.data && i.data.data.length != 1 + "(?:" + compile_item(i.data, doc, target) + ")" + resuffix + else + compile_item(i.data, doc, target) + resuffix + end + when Interscript::Node::Item::CaptureRef + if target == :par + raise Interscript::SystemConversionError, "Can't use CaptureRef in parallel mode" + elsif target == :re + "\\\\#{i.id}" + elsif target == :str + "\"\\\\#{i.id}\"" + end + when Interscript::Node::Item::Any + if target == :str + raise Interscript::SystemConversionError, "Can't use Any in a string context" # A linter could find this! + elsif target == :par + i.data.map(&:data) + elsif target == :re + case i.value + when Array + data = i.data.map { |j| compile_item(j, doc, target) } + "(?:"+data.join("|")+")" + when String + "[#{re_escape(i.value)}]" + when Range + "[#{re_escape(i.value.first)}-#{re_escape(i.value.last)}]" + end + end + end + end + + @maps_loaded = {} + @ctx = nil + class << self + attr_accessor :maps_loaded + attr_accessor :ctx + end + + def load + if !self.class.maps_loaded[@map.name] + @map.dependencies.each do |dep| + dep = dep.full_name + if !self.class.maps_loaded[dep] + Interscript.load(dep, compiler: self.class).load + end + end + + ctx = self.class.ctx + python_src_path = File.join(__dir__, '..', '..', '..', '..', 'python', 'src') + unless ctx + PyCall.sys.path.append(python_src_path) + self.class.ctx = PyCall.import_module("interscript") + end + #puts @code + Dir.mkdir("#{python_src_path}/interscript/maps") rescue nil + File.write("#{python_src_path}/interscript/maps/#{@map.name}.py", @code) + self.class.ctx.load_map(@map.name) + + self.class.maps_loaded[@map.name] = true + end + end + + def call(str, stage=:main) + load + self.class.ctx.transliterate(@map.name, str, stage.to_s) + end + + def self.read_debug_data + (ctx['map_debug'] || []).map(&:to_a).to_a + end + + def self.reset_debug_data + ctx['map_debug'].clear + end +end diff --git a/ruby/lib/interscript/compiler/ruby.rb b/lib/interscript/compiler/ruby.rb similarity index 85% rename from ruby/lib/interscript/compiler/ruby.rb rename to lib/interscript/compiler/ruby.rb index 78788ad3..dcdefcdf 100644 --- a/ruby/lib/interscript/compiler/ruby.rb +++ b/lib/interscript/compiler/ruby.rb @@ -60,11 +60,11 @@ def compile_rule(r, map = @map, wrapper = false) # Try to build a tree a = [] r.children.each do |i| - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i - raise ArgumentError, "Can't parallelize rules with :before" if i.before - raise ArgumentError, "Can't parallelize rules with :after" if i.after - raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before - raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize rules with :before" if i.before + raise Interscript::SystemConversionError, "Can't parallelize rules with :after" if i.after + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_before" if i.not_before + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_after" if i.not_after next if i.reverse_run == true a << [compile_item(i.from, map, :par), compile_item(i.to, map, :parstr)] @@ -79,7 +79,7 @@ def compile_rule(r, map = @map, wrapper = false) # Otherwise let's build a megaregexp a = [] Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i| - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i next if i.reverse_run == true a << [build_regexp(i, map), compile_item(i.to, map, :parstr)] @@ -112,7 +112,7 @@ def compile_rule(r, map = @map, wrapper = false) end c += "s = Interscript::Maps.transliterate(#{stage.doc_name.inspect}, s, #{stage.name.inspect})\n" else - raise ArgumentError, "Can't compile unhandled #{r.class}" + raise Interscript::SystemConversionError, "Can't compile unhandled #{r.class}" end c end @@ -146,17 +146,17 @@ def compile_item i, doc=@map, target=nil astr = if i.map d = doc.dep_aliases[i.map].document a = d.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} of #{i.stage.map} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} of #{i.stage.map} not found" unless a "Interscript::Maps.get_alias_ALIASTYPE(#{a.doc_name.inspect}, #{a.name.inspect})" elsif Interscript::Stdlib::ALIASES.include?(i.name) if target != :re && Interscript::Stdlib.re_only_alias?(i.name) - raise ArgumentError, "Can't use #{i.name} in a #{target} context" + raise Interscript::SystemConversionError, "Can't use #{i.name} in a #{target} context" end stdlib_alias = true "Interscript::Stdlib::ALIASES[#{i.name.inspect}]" else a = doc.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} not found" unless a "Interscript::Maps.get_alias_ALIASTYPE(#{a.doc_name.inspect}, #{a.name.inspect})" end @@ -194,7 +194,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::CaptureGroup if target != :re - raise ArgumentError, "Can't use a CaptureGroup in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a CaptureGroup in a #{target} context" end "(" + compile_item(i.data, doc, target) + ")" when Interscript::Node::Item::Maybe, @@ -206,7 +206,7 @@ def compile_item i, doc=@map, target=nil Interscript::Node::Item::MaybeSome => "*" }[i.class] if target == :par - raise ArgumentError, "Can't use a Maybe in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a Maybe in a #{target} context" end if Interscript::Node::Item::String === i.data && i.data.data.length != 1 "(?:" + compile_item(i.data, doc, target) + ")" + resuffix @@ -215,7 +215,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::CaptureRef if target == :par - raise ArgumentError, "Can't use CaptureRef in parallel mode" + raise Interscript::SystemConversionError, "Can't use CaptureRef in parallel mode" elsif target == :re "\\#{i.id}" elsif target == :str @@ -223,7 +223,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::Any if target == :str - raise ArgumentError, "Can't use Any in a string context" # A linter could find this! + raise Interscript::SystemConversionError, "Can't use Any in a string context" # A linter could find this! elsif target == :par i.data.map(&:data) elsif target == :re diff --git a/ruby/lib/interscript/detector.rb b/lib/interscript/detector.rb similarity index 100% rename from ruby/lib/interscript/detector.rb rename to lib/interscript/detector.rb diff --git a/ruby/lib/interscript/dsl.rb b/lib/interscript/dsl.rb similarity index 90% rename from ruby/lib/interscript/dsl.rb rename to lib/interscript/dsl.rb index f25a0090..60301454 100644 --- a/ruby/lib/interscript/dsl.rb +++ b/lib/interscript/dsl.rb @@ -67,7 +67,7 @@ def self.parse(map_name, reverse: true) ruby << l end end - raise ArgumentError, "metadata stage isn't terminated" if md_reading + raise Interscript::MapLogicError, "metadata stage isn't terminated" if md_reading ruby, yaml = ruby.join("\n"), yaml.join("\n") obj = Interscript::DSL::Document.new(map_name) @@ -76,7 +76,12 @@ def self.parse(map_name, reverse: true) yaml = if yaml =~ /\A\s*\z/ {} else - YAML.load(yaml, exc_fname) + unsafe_load = if YAML.respond_to? :unsafe_load + :unsafe_load + else + :load + end + YAML.public_send(unsafe_load, yaml, filename: exc_fname) end md = Interscript::DSL::Metadata.new(yaml: true, map_name: map_name, library: library) do diff --git a/ruby/lib/interscript/dsl/aliases.rb b/lib/interscript/dsl/aliases.rb similarity index 82% rename from ruby/lib/interscript/dsl/aliases.rb rename to lib/interscript/dsl/aliases.rb index 62e6af65..da9c831f 100644 --- a/ruby/lib/interscript/dsl/aliases.rb +++ b/lib/interscript/dsl/aliases.rb @@ -14,7 +14,7 @@ def def_alias(name, value) end unless Symbol === name - raise TypeError, "Alias name must be a Symbol, given #{name.class}" + raise Interscript::SystemConversionError, "Alias name must be a Symbol, given #{name.class}" end puts "def_alias(#{name.inspect}, #{thing.inspect})" if $DEBUG diff --git a/ruby/lib/interscript/dsl/document.rb b/lib/interscript/dsl/document.rb similarity index 100% rename from ruby/lib/interscript/dsl/document.rb rename to lib/interscript/dsl/document.rb diff --git a/ruby/lib/interscript/dsl/group.rb b/lib/interscript/dsl/group.rb similarity index 92% rename from ruby/lib/interscript/dsl/group.rb rename to lib/interscript/dsl/group.rb index 4a71e1d0..1283a16f 100644 --- a/ruby/lib/interscript/dsl/group.rb +++ b/lib/interscript/dsl/group.rb @@ -10,7 +10,7 @@ def initialize(&block) def run(stage, **kwargs) if stage.class != Interscript::Node::Item::Stage - raise TypeError, "I::Node::Item::Stage expected, got #{stage.class}" + raise Interscript::MapLogicError, "I::Node::Item::Stage expected, got #{stage.class}" end @node.children << Interscript::Node::Rule::Run.new(stage, **kwargs) end diff --git a/ruby/lib/interscript/dsl/group/parallel.rb b/lib/interscript/dsl/group/parallel.rb similarity index 100% rename from ruby/lib/interscript/dsl/group/parallel.rb rename to lib/interscript/dsl/group/parallel.rb diff --git a/ruby/lib/interscript/dsl/items.rb b/lib/interscript/dsl/items.rb similarity index 90% rename from ruby/lib/interscript/dsl/items.rb rename to lib/interscript/dsl/items.rb index ef286128..6b80d64b 100644 --- a/ruby/lib/interscript/dsl/items.rb +++ b/lib/interscript/dsl/items.rb @@ -55,7 +55,7 @@ module Maps class << self # Select a remote map def [] map - Symbol === map or raise TypeError, "A map name must be a Symbol, not #{alias_name.class}" + Symbol === map or raise Interscript::MapLogicError, "A map name must be a Symbol, not #{alias_name.class}" Map.new(map) end alias method_missing [] @@ -68,7 +68,7 @@ def initialize name; @name = name; end # Implementation of `map.x.aliasname` def [] alias_name - Symbol === alias_name or raise TypeError, "An alias name must be a Symbol, not #{alias_name.class}" + Symbol === alias_name or raise Interscript::MapLogicError, "An alias name must be a Symbol, not #{alias_name.class}" Interscript::Node::Item::Alias.new(alias_name, map: @name) end alias method_missing [] diff --git a/ruby/lib/interscript/dsl/metadata.rb b/lib/interscript/dsl/metadata.rb similarity index 82% rename from ruby/lib/interscript/dsl/metadata.rb rename to lib/interscript/dsl/metadata.rb index 7feb4b59..85c19497 100644 --- a/ruby/lib/interscript/dsl/metadata.rb +++ b/lib/interscript/dsl/metadata.rb @@ -4,7 +4,7 @@ class Interscript::DSL::Metadata attr_accessor :node def initialize(yaml: false, map_name: "", library: true, &block) - raise ArgumentError, "Can't evaluate metadata from Ruby context" unless yaml + raise Interscript::MapLogicError, "Can't evaluate metadata from Ruby context" unless yaml @map_name = map_name @node = Interscript::Node::MetaData.new self.instance_exec(&block) @@ -20,13 +20,12 @@ def initialize(yaml: false, map_name: "", library: true, &block) STANDARD_STRING_KEYS = %i{authority_id id language source_script destination_script - name url creation_date adoption_date description - character source confirmation_date} + name creation_date adoption_date description + character source confirmation_date original_description} - STANDARD_ARRAY_KEYS = %i{notes} + STANDARD_ARRAY_KEYS = %i{notes implementation_notes original_notes url} - NONSTANDARD_KEYS = %i{special_rules original_description original_notes - implementation_notes} + NONSTANDARD_KEYS = %i{special_rules} NECESSARY_KEYS = %i{name language source_script destination_script} diff --git a/ruby/lib/interscript/dsl/stage.rb b/lib/interscript/dsl/stage.rb similarity index 100% rename from ruby/lib/interscript/dsl/stage.rb rename to lib/interscript/dsl/stage.rb diff --git a/ruby/lib/interscript/dsl/symbol_mm.rb b/lib/interscript/dsl/symbol_mm.rb similarity index 100% rename from ruby/lib/interscript/dsl/symbol_mm.rb rename to lib/interscript/dsl/symbol_mm.rb diff --git a/ruby/lib/interscript/dsl/tests.rb b/lib/interscript/dsl/tests.rb similarity index 100% rename from ruby/lib/interscript/dsl/tests.rb rename to lib/interscript/dsl/tests.rb diff --git a/ruby/lib/interscript/interpreter.rb b/lib/interscript/interpreter.rb similarity index 85% rename from ruby/lib/interscript/interpreter.rb rename to lib/interscript/interpreter.rb index 5def726c..7db1dfb2 100644 --- a/ruby/lib/interscript/interpreter.rb +++ b/lib/interscript/interpreter.rb @@ -92,11 +92,11 @@ def execute_rule r # Try to build a tree subs_array = [] r.children.each do |i| - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i - raise ArgumentError, "Can't parallelize rules with :before" if i.before - raise ArgumentError, "Can't parallelize rules with :after" if i.after - raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before - raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize rules with :before" if i.before + raise Interscript::SystemConversionError, "Can't parallelize rules with :after" if i.after + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_before" if i.not_before + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_after" if i.not_after next if i.reverse_run == true subs_array << [build_item(i.from, :par), build_item(i.to, :parstr)] end @@ -109,7 +109,7 @@ def execute_rule r # Otherwise let's build a megaregexp subs_array = [] Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i| # rule.from.max_length gives somewhat better test results, why is that - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i next if i.reverse_run == true subs_array << [build_regexp(i), build_item(i.to, :parstr)] end @@ -178,16 +178,16 @@ def build_item i, target=nil, doc=@map if i.map d = doc.dep_aliases[i.map].document a = d.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} of #{i.stage.map} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} of #{i.stage.map} not found" unless a build_item(a.data, target, d) elsif Interscript::Stdlib::ALIASES.include?(i.name) if target != :re && Interscript::Stdlib.re_only_alias?(i.name) - raise ArgumentError, "Can't use #{i.name} in a #{target} context" + raise Interscript::SystemConversionError, "Can't use #{i.name} in a #{target} context" end Interscript::Stdlib::ALIASES[i.name] else a = doc.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} not found" unless a build_item(a.data, target, doc) end when Interscript::Node::Item::String @@ -208,7 +208,7 @@ def build_item i, target=nil, doc=@map end when Interscript::Node::Item::CaptureGroup if target == :par - raise ArgumentError, "Can't use a CaptureGroup in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a CaptureGroup in a #{target} context" end "(" + build_item(i.data, target, doc) + ")" when Interscript::Node::Item::Maybe, @@ -220,7 +220,7 @@ def build_item i, target=nil, doc=@map Interscript::Node::Item::MaybeSome => "*" }[i.class] if target == :par - raise ArgumentError, "Can't use a MaybeSome in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a MaybeSome in a #{target} context" end if Interscript::Node::Item::String === i.data && i.data.data.length != 1 "(?:" + build_item(i.data, target, doc) + ")" + resuffix @@ -229,13 +229,13 @@ def build_item i, target=nil, doc=@map end when Interscript::Node::Item::CaptureRef if target == :par - raise ArgumentError, "Can't use CaptureRef in parallel mode" + raise Interscript::SystemConversionError, "Can't use CaptureRef in parallel mode" end "\\#{i.id}" when Interscript::Node::Item::Any if target == :str # We may never reach this point - raise ArgumentError, "Can't use Any in a string context" + raise Interscript::SystemConversionError, "Can't use Any in a string context" elsif target == :par i.data.map(&:data) elsif target == :re diff --git a/ruby/lib/interscript/node.rb b/lib/interscript/node.rb similarity index 100% rename from ruby/lib/interscript/node.rb rename to lib/interscript/node.rb diff --git a/ruby/lib/interscript/node/alias_def.rb b/lib/interscript/node/alias_def.rb similarity index 100% rename from ruby/lib/interscript/node/alias_def.rb rename to lib/interscript/node/alias_def.rb diff --git a/ruby/lib/interscript/node/dependency.rb b/lib/interscript/node/dependency.rb similarity index 100% rename from ruby/lib/interscript/node/dependency.rb rename to lib/interscript/node/dependency.rb diff --git a/ruby/lib/interscript/node/document.rb b/lib/interscript/node/document.rb similarity index 100% rename from ruby/lib/interscript/node/document.rb rename to lib/interscript/node/document.rb diff --git a/ruby/lib/interscript/node/group.rb b/lib/interscript/node/group.rb similarity index 100% rename from ruby/lib/interscript/node/group.rb rename to lib/interscript/node/group.rb diff --git a/ruby/lib/interscript/node/group/parallel.rb b/lib/interscript/node/group/parallel.rb similarity index 100% rename from ruby/lib/interscript/node/group/parallel.rb rename to lib/interscript/node/group/parallel.rb diff --git a/ruby/lib/interscript/node/group/sequential.rb b/lib/interscript/node/group/sequential.rb similarity index 100% rename from ruby/lib/interscript/node/group/sequential.rb rename to lib/interscript/node/group/sequential.rb diff --git a/ruby/lib/interscript/node/item.rb b/lib/interscript/node/item.rb similarity index 91% rename from ruby/lib/interscript/node/item.rb rename to lib/interscript/node/item.rb index dad62d0e..fe2e81de 100644 --- a/ruby/lib/interscript/node/item.rb +++ b/lib/interscript/node/item.rb @@ -42,7 +42,7 @@ def ==(other) def self.try_convert(i) i = Interscript::Node::Item::String.new(i) if i.class == ::String - raise TypeError, "Wrong type #{i.class}, expected I::Node::Item" unless Interscript::Node::Item === i + raise Interscript::MapLogicError, "Wrong type #{i.class}, expected I::Node::Item" unless Interscript::Node::Item === i i end end diff --git a/ruby/lib/interscript/node/item/alias.rb b/lib/interscript/node/item/alias.rb similarity index 100% rename from ruby/lib/interscript/node/item/alias.rb rename to lib/interscript/node/item/alias.rb diff --git a/ruby/lib/interscript/node/item/any.rb b/lib/interscript/node/item/any.rb similarity index 94% rename from ruby/lib/interscript/node/item/any.rb rename to lib/interscript/node/item/any.rb index f336b196..6e5a786f 100644 --- a/ruby/lib/interscript/node/item/any.rb +++ b/lib/interscript/node/item/any.rb @@ -10,7 +10,7 @@ def initialize data self.value = Interscript::Stdlib::ALIASES[data.name] else puts data.inspect - raise TypeError, "Wrong type #{data[0].class}, excepted Array, String or Range" + raise Interscript::MapLogicError, "Wrong type #{data[0].class}, excepted Array, String or Range" end end diff --git a/ruby/lib/interscript/node/item/capture.rb b/lib/interscript/node/item/capture.rb similarity index 100% rename from ruby/lib/interscript/node/item/capture.rb rename to lib/interscript/node/item/capture.rb diff --git a/ruby/lib/interscript/node/item/group.rb b/lib/interscript/node/item/group.rb similarity index 94% rename from ruby/lib/interscript/node/item/group.rb rename to lib/interscript/node/item/group.rb index 8203d637..b503efa1 100644 --- a/ruby/lib/interscript/node/item/group.rb +++ b/lib/interscript/node/item/group.rb @@ -48,7 +48,7 @@ def verify! end if wrong - raise TypeError, "An I::Node::Item::Group can't contain an #{wrong.class} item." + raise Interscript::MapLogicError, "An I::Node::Item::Group can't contain an #{wrong.class} item." end end diff --git a/ruby/lib/interscript/node/item/repeat.rb b/lib/interscript/node/item/repeat.rb similarity index 100% rename from ruby/lib/interscript/node/item/repeat.rb rename to lib/interscript/node/item/repeat.rb diff --git a/ruby/lib/interscript/node/item/stage.rb b/lib/interscript/node/item/stage.rb similarity index 100% rename from ruby/lib/interscript/node/item/stage.rb rename to lib/interscript/node/item/stage.rb diff --git a/ruby/lib/interscript/node/item/string.rb b/lib/interscript/node/item/string.rb similarity index 100% rename from ruby/lib/interscript/node/item/string.rb rename to lib/interscript/node/item/string.rb diff --git a/ruby/lib/interscript/node/metadata.rb b/lib/interscript/node/metadata.rb similarity index 100% rename from ruby/lib/interscript/node/metadata.rb rename to lib/interscript/node/metadata.rb diff --git a/ruby/lib/interscript/node/rule.rb b/lib/interscript/node/rule.rb similarity index 100% rename from ruby/lib/interscript/node/rule.rb rename to lib/interscript/node/rule.rb diff --git a/ruby/lib/interscript/node/rule/funcall.rb b/lib/interscript/node/rule/funcall.rb similarity index 100% rename from ruby/lib/interscript/node/rule/funcall.rb rename to lib/interscript/node/rule/funcall.rb diff --git a/ruby/lib/interscript/node/rule/run.rb b/lib/interscript/node/rule/run.rb similarity index 100% rename from ruby/lib/interscript/node/rule/run.rb rename to lib/interscript/node/rule/run.rb diff --git a/ruby/lib/interscript/node/rule/sub.rb b/lib/interscript/node/rule/sub.rb similarity index 98% rename from ruby/lib/interscript/node/rule/sub.rb rename to lib/interscript/node/rule/sub.rb index 6fe88b3b..7086e456 100644 --- a/ruby/lib/interscript/node/rule/sub.rb +++ b/lib/interscript/node/rule/sub.rb @@ -184,7 +184,7 @@ def reverse_transfer from, to node else state[:right][node.id] = true - state[:left][node.id - 1] or raise "Capture count doesn't match" + state[:left][node.id - 1] or raise Interscript::MapLogicError, "Capture count doesn't match" end when Interscript::Node::Item::CaptureGroup state[:left] << node diff --git a/ruby/lib/interscript/node/stage.rb b/lib/interscript/node/stage.rb similarity index 100% rename from ruby/lib/interscript/node/stage.rb rename to lib/interscript/node/stage.rb diff --git a/ruby/lib/interscript/node/tests.rb b/lib/interscript/node/tests.rb similarity index 100% rename from ruby/lib/interscript/node/tests.rb rename to lib/interscript/node/tests.rb diff --git a/ruby/lib/interscript/stdlib.rb b/lib/interscript/stdlib.rb similarity index 96% rename from ruby/lib/interscript/stdlib.rb rename to lib/interscript/stdlib.rb index 36ccdf31..2f9100d7 100644 --- a/ruby/lib/interscript/stdlib.rb +++ b/lib/interscript/stdlib.rb @@ -226,7 +226,7 @@ def self.unseparate(output, separator: " ") def self.secryst(output, model:) require "secryst" rescue nil # Try to load secryst, but don't fail hard if not possible. unless defined? Secryst - raise StandardError, "Secryst is not loaded. Please read docs/Usage_with_Secryst.adoc" + raise Interscript::ExternalUtilError, "Secryst is not loaded. Please read docs/Usage_with_Secryst.adoc" end Interscript.secryst_index_locations.each do |remote| Secryst::Provisioning.add_remote(remote) @@ -240,7 +240,7 @@ def self.secryst(output, model:) def self.rababa(output, config:) require "rababa" rescue nil # Try to load rababa, but don't fail hard if not possible. unless defined? Rababa - raise StandardError, "Rababa is not loaded. Please read docs/Usage_with_Rababa.adoc" + raise Interscript::ExternalUtilError, "Rababa is not loaded. Please read docs/Usage_with_Rababa.adoc" end config_value = Interscript.rababa_configs[config] diff --git a/ruby/lib/interscript/utils/helpers.rb b/lib/interscript/utils/helpers.rb similarity index 100% rename from ruby/lib/interscript/utils/helpers.rb rename to lib/interscript/utils/helpers.rb diff --git a/ruby/lib/interscript/utils/regexp_converter.rb b/lib/interscript/utils/regexp_converter.rb similarity index 100% rename from ruby/lib/interscript/utils/regexp_converter.rb rename to lib/interscript/utils/regexp_converter.rb diff --git a/ruby/lib/interscript/version.rb b/lib/interscript/version.rb similarity index 53% rename from ruby/lib/interscript/version.rb rename to lib/interscript/version.rb index 8f3fcab4..72eeefb3 100644 --- a/ruby/lib/interscript/version.rb +++ b/lib/interscript/version.rb @@ -1,3 +1,3 @@ module Interscript - VERSION = "2.3.2" + VERSION = "2.4.5" end diff --git a/ruby/lib/interscript/visualize.rb b/lib/interscript/visualize.rb similarity index 100% rename from ruby/lib/interscript/visualize.rb rename to lib/interscript/visualize.rb diff --git a/ruby/lib/interscript/visualize/group.html.erb b/lib/interscript/visualize/group.html.erb similarity index 100% rename from ruby/lib/interscript/visualize/group.html.erb rename to lib/interscript/visualize/group.html.erb diff --git a/ruby/lib/interscript/visualize/json.rb b/lib/interscript/visualize/json.rb similarity index 100% rename from ruby/lib/interscript/visualize/json.rb rename to lib/interscript/visualize/json.rb diff --git a/ruby/lib/interscript/visualize/map.html.erb b/lib/interscript/visualize/map.html.erb similarity index 87% rename from ruby/lib/interscript/visualize/map.html.erb rename to lib/interscript/visualize/map.html.erb index fcb8ac29..222ee8ad 100644 --- a/ruby/lib/interscript/visualize/map.html.erb +++ b/lib/interscript/visualize/map.html.erb @@ -35,7 +35,7 @@ <% case k when :url %> -
<%= h v %> +
<% v.each do |i| %><%= h i %><% if i != v.last %>; <% end %><% end %> <% when :notes, :implementation_notes, :special_rules, :original_notes, :original_description # We ignore notes for now %> <% else %>
<%= h v %> @@ -43,4 +43,4 @@ <% end %> -<%= render_stage(self.map.name, :main) %> \ No newline at end of file +<%= render_stage(self.map.name, :main) %> diff --git a/ruby/lib/interscript/visualize/nodes.rb b/lib/interscript/visualize/nodes.rb similarity index 100% rename from ruby/lib/interscript/visualize/nodes.rb rename to lib/interscript/visualize/nodes.rb diff --git a/maps b/maps deleted file mode 160000 index 5af40c88..00000000 --- a/maps +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 5af40c881727686d49fae82aca282b65a8053c1e diff --git a/release.sh b/release.sh deleted file mode 100755 index c45a3f6e..00000000 --- a/release.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# We assume this is executed in the main Interscript repository root directory. -# Adjust the V to reflect a correct version -echo -n "Version(2.x.x):" -read V - -# Adjust the B to reflect a correct branch name. For now, master. In the future we may decide on -# how to do stable branches. -B="master" - -# Ensure we are on the latest repository version and all subrepos are up to date as well. -git checkout $B; git pull; git reset -pushd js; git checkout $B; git pull; git reset; popd -pushd maps; git checkout $B; git pull; git reset; popd -# This is the point when you may want to run tests and ensure everything is correct. -# Run the version update script -pushd ruby; bundle exec rake version[$V]; popd -# Add the new version to the submodules, commit it and tag it -pushd js; git add package.json; git commit -m "Release v$V"; git tag "v$V"; popd -pushd maps; git add interscript-maps.gemspec; git commit -m "Release v$V"; git tag "v$V"; popd -# Add the new version and submodules to the main repo, commit it and tag it -git add js maps ruby/lib/interscript/version.rb; git commit -m "Release v$V"; git tag "v$V" - -# Push everything in the correct order -echo "Push js repo..." -pushd js; git push; git push --tags; popd -echo "Push map repo..." -pushd maps; git push; git push --tags; popd -echo "Push main repo..." -git push; git push --tags - -# Our new version is released! -echo "Our new version $V is released!" diff --git a/ruby/requirements.txt b/requirements.txt similarity index 100% rename from ruby/requirements.txt rename to requirements.txt diff --git a/ruby/.gitignore b/ruby/.gitignore deleted file mode 100644 index b04a8c84..00000000 --- a/ruby/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -/.bundle/ -/.yardoc -/_yardoc/ -/coverage/ -/doc/ -/pkg/ -/spec/reports/ -/tmp/ - -# rspec failure tracking -.rspec_status diff --git a/ruby/LICENSE.adoc b/ruby/LICENSE.adoc deleted file mode 100644 index 86b8593b..00000000 --- a/ruby/LICENSE.adoc +++ /dev/null @@ -1,31 +0,0 @@ -= Licenses & Copyright - -This license file adheres to the formatting guidelines of -https://github.com/nevir/readable-licenses[readable-licenses]. - - -== Ribose BSD 2-Clause License - -Copyright (c) 2019-, https://www.ribose.com[Ribose Inc]. -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/ruby/lib/interscript/command.rb b/ruby/lib/interscript/command.rb deleted file mode 100644 index 6457e5de..00000000 --- a/ruby/lib/interscript/command.rb +++ /dev/null @@ -1,28 +0,0 @@ -require 'thor' -require 'interscript' -require 'json' -module Interscript - # Command line interface - class Command < Thor - desc '', 'Transliterate text' - option :system, aliases: '-s', required: true, desc: 'Transliteration system' - option :output, aliases: '-o', required: false, desc: 'Output file' - # Was this option really well thought out? The last parameter is a cache, isn't it? - #option :map, aliases: '-m', required: false, default: "{}", desc: 'Transliteration mapping json' - - def translit(input) - if options[:output] - Interscript.transliterate_file(options[:system], input, options[:output]) #, JSON.parse(options[:map])) - else - puts Interscript.transliterate(options[:system], IO.read(input)) - end - end - - desc 'list', 'Prints allowed transliteration systems' - def list - Interscript.maps(load_path: true).each do |path| - puts path - end - end - end -end diff --git a/ruby/spec/authority_codes.yaml b/spec/authority_codes.yaml similarity index 100% rename from ruby/spec/authority_codes.yaml rename to spec/authority_codes.yaml diff --git a/ruby/spec/composability_spec.rb b/spec/composability_spec.rb similarity index 100% rename from ruby/spec/composability_spec.rb rename to spec/composability_spec.rb diff --git a/ruby/spec/detector_spec.rb b/spec/detector_spec.rb similarity index 100% rename from ruby/spec/detector_spec.rb rename to spec/detector_spec.rb diff --git a/ruby/spec/dsl_stage_spec.rb b/spec/dsl_stage_spec.rb similarity index 100% rename from ruby/spec/dsl_stage_spec.rb rename to spec/dsl_stage_spec.rb diff --git a/ruby/spec/interscript_spec.rb b/spec/interscript_spec.rb similarity index 100% rename from ruby/spec/interscript_spec.rb rename to spec/interscript_spec.rb diff --git a/ruby/spec/map_name_and_metadata_spec.rb b/spec/map_name_and_metadata_spec.rb similarity index 100% rename from ruby/spec/map_name_and_metadata_spec.rb rename to spec/map_name_and_metadata_spec.rb diff --git a/ruby/spec/reversibility_spec.rb b/spec/reversibility_spec.rb similarity index 100% rename from ruby/spec/reversibility_spec.rb rename to spec/reversibility_spec.rb diff --git a/ruby/spec/spec_helper.rb b/spec/spec_helper.rb similarity index 87% rename from ruby/spec/spec_helper.rb rename to spec/spec_helper.rb index eabec063..101688ff 100644 --- a/ruby/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -9,6 +9,7 @@ require "interscript" require "interscript/compiler/ruby" require "interscript/compiler/javascript" unless ENV["SKIP_JS"] +require "interscript/compiler/python" unless ENV["SKIP_PYTHON"] require "interscript/utils/helpers" RSpec.configure do |config| @@ -29,6 +30,7 @@ def each_compiler &block compilers << Interscript::Interpreter compilers << Interscript::Compiler::Ruby compilers << Interscript::Compiler::Javascript unless ENV["SKIP_JS"] + compilers << Interscript::Compiler::Python unless ENV["SKIP_PYTHON"] compilers.each do |compiler| block.(compiler) diff --git a/ruby/spec/transliterate_each_spec.rb b/spec/transliterate_each_spec.rb similarity index 100% rename from ruby/spec/transliterate_each_spec.rb rename to spec/transliterate_each_spec.rb