diff --git a/.github/workflows/assets.yml b/.github/workflows/assets.yml index 7c0a8e94..39c128e6 100644 --- a/.github/workflows/assets.yml +++ b/.github/workflows/assets.yml @@ -4,6 +4,7 @@ on: push: tags: - 'v*' + workflow_dispatch: jobs: assets: @@ -15,17 +16,25 @@ jobs: - name: Checkout repository uses: actions/checkout@v2 with: - repository: interscript/interscript-bootstrap - - name: Run a bootstrap script + repository: interscript/interscript + + - name: Run bootstrap script run: ruby bootstrap.rb + - name: Use Ruby uses: ruby/setup-ruby@v1 with: - ruby-version: 3.0 + ruby-version: "3.0" bundler-cache: true - working-directory: ./ruby + + - name: Install bundle + working-directory: ./ruby + run: bundle install --jobs 4 --retry 3 --with jsexec --without secryst + - name: Generate visualization json - run: pushd ruby; bundle exec rake generate_visualization_json; popd + working-directory: ./ruby + run: bundle exec rake generate_visualization_json + - name: Archive json files from the previous step uses: thedoctor0/zip-release@master with: @@ -34,6 +43,7 @@ jobs: directory: ./ruby/ exclusions: '*.git*' type: zip + - name: Upload artifacts id: upload_vis_json uses: svenstaro/upload-release-action@2.2.1 @@ -43,8 +53,11 @@ jobs: file_glob: true tag: ${{ github.ref }} overwrite: true + - name: Generate metadata - run: pushd ruby; bundle exec rake generate_metadata_json; popd + working-directory: ./ruby + run: bundle exec rake generate_metadata_json + - name: Archive metadata from the previous step uses: thedoctor0/zip-release@master with: @@ -53,6 +66,7 @@ jobs: directory: ./ruby/ exclusions: '*.git*' type: zip + - name: Upload metadata id: upload_metadata uses: svenstaro/upload-release-action@2.2.1 @@ -62,11 +76,13 @@ jobs: asset_name: metadata.json.zip tag: ${{ github.ref }} overwrite: true + - name: Output link run: | echo ${{ steps.upload_vis_json.outputs.browser_download_url }} echo ${{ steps.upload_metadata.outputs.browser_download_url }} -# - name: Trigger interscript.org + +# - name: Trigger deploy at interscript.org # uses: peter-evans/repository-dispatch@v1 # with: # token: ${{ secrets.INTERSCRIPT_CI_TOKEN }} diff --git a/.github/workflows/rake.yml b/.github/workflows/rake.yml index 3bf0bf48..6a789f3e 100644 --- a/.github/workflows/rake.yml +++ b/.github/workflows/rake.yml @@ -2,7 +2,7 @@ name: rake on: push: - branches: [ master, v* ] + branches: [ main, v*, ci-check ] tags: [ v* ] pull_request: @@ -10,24 +10,12 @@ jobs: rspec: name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }} runs-on: ${{ matrix.os }} - continue-on-error: ${{ matrix.experimental }} + continue-on-error: true strategy: fail-fast: false matrix: - ruby: [ 2.7, 2.6, 2.5 ] + ruby: [ 3.3, 3.2, 3.1, "3.0", 2.7, 2.6 ] os: [ ubuntu-latest, windows-latest, macos-latest ] - experimental: [ false ] - include: - - ruby: 3.0 - os: 'ubuntu-latest' - experimental: true - - ruby: 3.0 - os: 'windows-latest' - experimental: true - - ruby: 3.0 - os: 'macos-latest' - experimental: true - env: BUNDLE_WITHOUT: "secryst" SKIP_JS: "1" @@ -36,9 +24,9 @@ jobs: - name: Checkout repository uses: actions/checkout@v2 with: - repository: interscript/interscript-bootstrap + repository: interscript/interscript - - name: Run a bootstrap script + - name: Run bootstrap script run: ruby bootstrap.rb - name: Use Ruby @@ -46,11 +34,10 @@ jobs: with: ruby-version: ${{ matrix.ruby }} bundler-cache: true - working-directory: ./ruby - name: Run RSpecs + working-directory: ./ruby run: | - pushd ruby + pip install regex bundle install --with=jsexec - bundle exec rspec -f f - popd + bundle exec rspec diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1d8ae909..d8de6a2e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,16 +12,14 @@ jobs: - name: Checkout repository uses: actions/checkout@v2 with: - repository: interscript/interscript-bootstrap + repository: interscript/interscript - - name: Run a bootstrap script + - name: Run bootstrap script run: ruby bootstrap.rb - - uses: actions/setup-ruby@v1 + - uses: ruby/setup-ruby@v1 with: - ruby-version: '2.7' - architecture: 'x64' - working-directory: ./ruby + ruby-version: '3.0' - uses: actions/setup-node@v1 with: @@ -29,19 +27,24 @@ jobs: # For now let's install without secryst, as we don't necessarily need it. # We may need to change it once we start to depend on secryst maps. - - run: pushd ruby && bundle install --jobs 4 --retry 3 --with jsexec --without secryst && popd + - name: Install bundle + working-directory: ./ruby + run: bundle install --jobs 4 --retry 3 --with jsexec --without secryst - - name: Test the Ruby package - run: pushd ruby && bundle exec rake && popd + - name: Test Ruby package + working-directory: ./ruby + run: bundle exec rake - - name: Test the JS package - run: pushd js && npm install && npm run prepareMaps && npm test && popd + - name: Test JS package + working-directory: ./js + run: npm install && npm run prepareMaps && npm test - name: Publish to rubygems.org env: RUBYGEMS_API_KEY: ${{secrets.INTERSCRIPT_RUBYGEMS_API_KEY}} run: | gem install gem-release + mkdir -p ~/.gem touch ~/.gem/credentials cat > ~/.gem/credentials << EOF --- diff --git a/Gemfile b/Gemfile index 17c184a8..cf4f70d5 100644 --- a/Gemfile +++ b/Gemfile @@ -26,6 +26,12 @@ unless ENV["SKIP_JS"] end end +unless ENV["SKIP_PYTHON"] + group :pyexec do + gem 'pycall' + end +end + group :rababa do gem 'rababa', "~> 0.1.1" end diff --git a/Rakefile b/Rakefile index 571a1c58..e5ed34b3 100644 --- a/Rakefile +++ b/Rakefile @@ -14,6 +14,9 @@ task :compile, [:compiler, :target] do |t, args| when "javascript" require "interscript/compiler/javascript" [Interscript::Compiler::Javascript, "js"] + when "python" + require "interscript/compiler/python" + [Interscript::Compiler::Python, "py"] end FileUtils.mkdir_p(args[:target]) @@ -34,24 +37,7 @@ task :compile, [:compiler, :target] do |t, args| File.write(args[:target] + "/" + map + "." + ext, code) end - File.write(args[:target] + "/index.json", maplist.to_json) -end - -task :version, [:ver, :part] do |t, args| - ver = args[:ver] - part = args[:part] - - rubyver = File.read(rubyfile = __dir__+"/lib/interscript/version.rb") - jsver = File.read(jsfile = __dir__+"/../js/package.json") - mapsver = File.read(mapsfile = __dir__+"/../maps/interscript-maps.gemspec") - - rubyver = rubyver.gsub(/(VERSION = ")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") - jsver = jsver.gsub(/("version": ")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") - mapsver = mapsver.gsub(/(INTERSCRIPT_MAPS_VERSION=")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") - - File.write(rubyfile, rubyver) if %w[all ruby].include? part - File.write(jsfile, jsver) if %w[all js].include? part - File.write(mapsfile, mapsver) if %w[all maps].include? part + File.write(args[:target] + "/index.json", maplist.to_json) if args[:compiler] == "javascript" end task :generate_visualization_html do diff --git a/bin/set_version b/bin/set_version new file mode 100755 index 00000000..aa99c3d5 --- /dev/null +++ b/bin/set_version @@ -0,0 +1,16 @@ +#!/usr/bin/env ruby +ver = ARGV[0] +part = ARGV[1] + +rubyver = File.read(rubyfile = __dir__+"/../lib/interscript/version.rb") +jsver = File.read(jsfile = __dir__+"/../../js/package.json") +mapsver = File.read(mapsfile = __dir__+"/../../maps/interscript-maps.gemspec") + +rubyver = rubyver.gsub(/(VERSION = ")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") +jsver = jsver.gsub(/("version": ")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") +mapsver = mapsver.gsub(/(INTERSCRIPT_MAPS_VERSION=")([0-9a-z.-]*)(")/, "\\1#{ver}\\3") + +File.write(rubyfile, rubyver) if %w[all ruby].include? part +File.write(jsfile, jsver) if %w[all js].include? part +File.write(mapsfile, mapsver) if %w[all maps].include? part + diff --git a/docs/demo/20191118-interscript-demo-cast.gif b/docs/demo/20191118-interscript-demo-cast.gif new file mode 100644 index 00000000..a2808c47 Binary files /dev/null and b/docs/demo/20191118-interscript-demo-cast.gif differ diff --git a/docs/samples/ara-Arab.txt b/docs/samples/ara-Arab.txt new file mode 100644 index 00000000..4c711c6d --- /dev/null +++ b/docs/samples/ara-Arab.txt @@ -0,0 +1,4 @@ +عندما يريد العالم أن ‪يتكلّم ‬ ، فهو يتحدّث بلغة يونيكود. تسجّل الآن لحضور المؤتمر الدولي العاشر ليونيكود (Unicode Conference)، الذي سيعقد في 10-12 آذار 1997 بمدينة مَايِنْتْس، ألمانيا. و سيجمع المؤتمر بين خبراء من كافة قطاعات الصناعة على الشبكة العالمية انترنيت ويونيكود، حيث ستتم، على الصعيدين الدولي والمحلي على حد سواء مناقشة سبل استخدام يونكود في النظم القائمة وفيما يخص التطبيقات الحاسوبية، الخطوط، تصميم النصوص والحوسبة متعددة اللغات. + +مَمِمّمَّمِّ + diff --git a/docs/samples/chn-Hant.txt b/docs/samples/chn-Hant.txt new file mode 100644 index 00000000..bc042b44 --- /dev/null +++ b/docs/samples/chn-Hant.txt @@ -0,0 +1,2 @@ +当世界需要沟通时,请用Unicode!将于1997年3月10日-12日在德国 Mainz 市举行的第十届统一码国际研讨会现在开始注册。本次会议将汇集各方面的专家。涉及的领域包括:国际互联网和统一码,国际化和本地化,统一码在操作系统和应用软件中的实现,字型,文本格式以及多文种计算等。 + diff --git a/docs/samples/devanagari.txt b/docs/samples/devanagari.txt new file mode 100644 index 00000000..cb427128 --- /dev/null +++ b/docs/samples/devanagari.txt @@ -0,0 +1,2 @@ +हालाँकि सूर के जीवन के बारे में कई जनश्रुतियाँ प्रचलित हैं, पर इन में कितनी सच्चाई है यह कहना कठिन है। कहा जाता है उनका जन्म सन् १४७८ में दिल्ली के पास एक ग़रीब ब्राह्मीण परिवार में हुआ। जनश्रुति के अनुसार सूरदास जन्म से ही अंधे थे। आजकल थी अंधे आदमी अक्सर 'सूरदास' कहलाते हैं। कई लोगों ने उन्हें गुरु के रूप में अपनाया और उनकी पूजा करना शुरु कर दिया । + diff --git a/docs/samples/gre-Grek.txt b/docs/samples/gre-Grek.txt new file mode 100644 index 00000000..13425d79 --- /dev/null +++ b/docs/samples/gre-Grek.txt @@ -0,0 +1,2 @@ +Όταν ο κόσμος θέλει να επικοινωνήσει, μιλά Unicode. Εγγραφείτε τώρα στη Δέκατη Διεθνή Διάσκεψη για το Unicode, η οποία θα πραγματοποιηθεί 10-12 Μαρτίου 1997 στο Mainz της Γερμανίας. Η διάσκεψη θα φέρει κοντά ειδικούς από όλους τους τομείς Internet και Unicode, διεθνοποίησης και προσαρμογής λογισμικού, εφαρμογών του Unicode σε λειτουργικά συστήματα και εφαρμογές, γραμματοσειρές, διάρθρωση κειμένου και πολυγλωσσική εργασία σε υπολογιστές. + diff --git a/docs/samples/hangul.txt b/docs/samples/hangul.txt new file mode 100644 index 00000000..29eb6ea2 --- /dev/null +++ b/docs/samples/hangul.txt @@ -0,0 +1,2 @@ +세계를 향한 대화, 유니코드로 하십시오. 제10회 유니코드 국제 회의가 1997년 3월 10일부터 12일까지 독일의 마인즈에서 열립니다. 지금 등록하십시오. 이 회의에서는 업계 전반의 전문가들이 함께 모여 다음과 같은 분야를 다룹니다. - 인터넷과 유니코드, 국제화와 지역화, 운영 체제와 응용 프로그램에서 유니코드의 구현, 글꼴, 문자 배열, 다국어 컴퓨팅. + diff --git a/docs/samples/heb-Hebr.txt b/docs/samples/heb-Hebr.txt new file mode 100644 index 00000000..cd4a8699 --- /dev/null +++ b/docs/samples/heb-Hebr.txt @@ -0,0 +1,2 @@ +כאשר העולם רוצה לדבר, הוא מדבר ב־Unicode. הירשמו כעת לכנס Unicode הבינלאומי העשירי, שייערך בין התאריכים 12־10 במרץ 1997, בְּמָיְינְץ שבגרמניה. בכנס ישתתפו מומחים מכל ענפי התעשייה בנושא האינטרנט העולמי וה־Unicode, בהתאמה לשוק הבינלאומי והמקומי, ביישום Unicode במערכות הפעלה וביישומים, בגופנים, בפריסת טקסט ובמחשוב רב־לשוני. + diff --git a/docs/samples/hungarian.txt b/docs/samples/hungarian.txt new file mode 100644 index 00000000..2795971c --- /dev/null +++ b/docs/samples/hungarian.txt @@ -0,0 +1,2 @@ +Ha a világ beszélni akarna, Unicode-ul szólalna meg. Regisztráljon már most a Tizedik Nemzetközi Unicode Konferenciára, melyet 1997. március 10-12-én rendeznek Meinz-ban, Németországban. Ezen a konferencián az iparág több neves szakértője is résztvesz. Ízelítőül a témákból: a világháló és a Unicode nemzetközisítése és lokalizálása, a Unicode alkalmazása működő rendszerekben és alkalmazásokban, szövegelrendezésnél, és többnyelvű számítógépeken. + diff --git a/docs/samples/japanese.txt b/docs/samples/japanese.txt new file mode 100644 index 00000000..ce2dfb70 --- /dev/null +++ b/docs/samples/japanese.txt @@ -0,0 +1,2 @@ +世界的に話すなら、Unicodeです。第10回のUnicode会議は1997年3月10~12日、ドイツのマインツで開かれます。 参 さん 加 か 希 き 望 ぼう の方は今すぐ登録してください。(この会議では、グローバルなインタネット、Unicode、ソフトウェアの国際化およびローカリゼーション、OSおよびアプリケーションでのUnicodeのインプリメンテーション、フォント、テキスト表示、マルチ言語コンピューティングにおける業界の専門家が集まります。) + diff --git a/docs/samples/rus-Cyrl-1.txt b/docs/samples/rus-Cyrl-1.txt new file mode 100644 index 00000000..a3eb84fc --- /dev/null +++ b/docs/samples/rus-Cyrl-1.txt @@ -0,0 +1,54 @@ +Выборы депутатов Государственной Думы Федерального Собрания Российской Федерации седьмого созыва +Дата голосования: 18.09.2016 + +Наименование Избирательной комиссии: ЦИК России + +Сводная таблица результатов выборов по федеральному избирательному округу + + + + +1 Число избирателей, внесенных в список избирателей на момент окончания голосования +2 Число избирательных бюллетеней, полученных участковой избирательной комиссией +3 Число избирательных бюллетеней, выданных избирателям, проголосовавшим досрочно +4 Число избирательных бюллетеней, выданных в помещении для голосования в день голосования +5 Число избирательных бюллетеней, выданных вне помещения для голосования в день голосования +6 Число погашенных избирательных бюллетеней +7 Число избирательных бюллетеней, содержащихся в переносных ящиках для голосования +8 Число избирательных бюллетеней, содержащихся в стационарных ящиках для голосования +9 Число недействительных избирательных бюллетеней +10 Число действительных избирательных бюллетеней +11 Число открепительных удостоверений, полученных участковой избирательной комиссией +12 Число открепительных удостоверений, выданных на избирательном участке до дня голосования +13 Число избирателей, проголосовавших по открепительным удостоверениям на избирательном участке +14 Число погашенных неиспользованных открепительных удостоверений +15 Число открепительных удостоверений, выданных избирателям территориальной избирательной комиссией +16 Число утраченных открепительных удостоверений +17 Число утраченных избирательных бюллетеней +18 Число избирательных бюллетеней, не учтенных при получении + +19 1. ВСЕРОССИЙСКАЯ ПОЛИТИЧЕСКАЯ ПАРТИЯ "РОДИНА" +20 2. Политическая партия КОММУНИСТИЧЕСКАЯ ПАРТИЯ КОММУНИСТЫ РОССИИ +21 3. Политическая партия "Российская партия пенсионеров за справедливость" +22 4. Всероссийская политическая партия "ЕДИНАЯ РОССИЯ" +23 5. Политическая партия "Российская экологическая партия "Зеленые" +24 6. Политическая партия "Гражданская Платформа" +25 7. Политическая партия ЛДПР - Либерально-демократическая партия России +26 8. Политическая партия "Партия народной свободы" (ПАРНАС) +27 9. Всероссийская политическая партия "ПАРТИЯ РОСТА" +28 10. Общественная организация Всероссийская политическая партия "Гражданская Сила" +29 11. Политическая партия "Российская объединенная демократическая партия "ЯБЛОКО" +30 12. Политическая партия "КОММУНИСТИЧЕСКАЯ ПАРТИЯ РОССИЙСКОЙ ФЕДЕРАЦИИ" +31 13. Политическая партия "ПАТРИОТЫ РОССИИ" +32 14. Политическая партия СПРАВЕДЛИВАЯ РОССИЯ + + + +Данные окружных избирательных комиссий о числе открепительных удостоверений + + +д Число открепительных удостоверений, полученных окружной избирательной комиссией +е Число открепительных удостоверений, выданных территориальным избирательным комиссиям +ж Число неиспользованных открепительных удостоверений, погашенных окружной избирательной комиссией +з Число открепительных удостоверений, утраченных в окружной избирательной комиссии + diff --git a/docs/samples/rus-Cyrl-2.txt b/docs/samples/rus-Cyrl-2.txt new file mode 100644 index 00000000..fcfcdae2 --- /dev/null +++ b/docs/samples/rus-Cyrl-2.txt @@ -0,0 +1,38 @@ +Дополнительные выборы депутатов Муниципального Совета городского поселения Ростов четвертого созыва по четырехмандатному избирательному округу №1 на замещение двух депутатских мандатов +Дата голосования: 09.09.2018 + +Наименование Избирательной комиссии: Ростовский городской + +Результаты выборов по одномандатному (многомандатному) округу + + + + +Дата и время подписания протокола 10.09.2018 06:20:00 + + +1 Число избирателей, включенных в список избирателей на момент окончания голосования +2 Число бюллетеней, полученных участковой комиссией +3 Число бюллетеней, выданных избирателям, проголосовавшим досрочно +4 В том числе бюллетеней, выданных избирателям, проголосовавшим досрочно в помещении ТИК +5 Число бюллетеней, выданных избирателям в помещении для голосования +6 Число бюллетеней, выданных избирателям, проголосовавшим вне помещения для голосования +7 Число погашенных бюллетеней +8 Число бюллетеней, содержащихся в переносных ящиках +9 Число бюллетеней, содержащихся в стационарных ящиках +10 Число недействительных бюллетеней +11 Число действительных бюллетеней +11а Число утраченных бюллетеней +11б Число бюллетеней, не учтенных при получении + +12 Гаврилов Александр Витальевич +13 Жаринов Владимир Иванович +14 Иванов Кирилл Алексеевич +15 Иконников Дмитрий Александрович +16 Кичков Сергей Николаевич +17 Кошонин Алексей Александрович +18 Михайловский Анатолий Ярославович +19 Морсунин Сергей Владимирович +20 Невзоров Олег Витальевич +21 Полозов Игорь Николаевич +22 Сахаров Александр Александрович \ No newline at end of file diff --git a/docs/samples/rus-Cyrl.txt b/docs/samples/rus-Cyrl.txt new file mode 100644 index 00000000..456d5f54 --- /dev/null +++ b/docs/samples/rus-Cyrl.txt @@ -0,0 +1,2 @@ +Эх, тройка! птица тройка, кто тебя выдумал? знать, у бойкого народа ты могла только родиться, в той земле, что не любит шутить, а ровнем-гладнем разметнулась на полсвета, да и ступай считать версты, пока не зарябит тебе в очи. И не хитрый, кажись, дорожный снаряд, не железным схвачен винтом, а наскоро живьём с одним топором да долотом снарядил и собрал тебя ярославский расторопный мужик. Не в немецких ботфортах ямщик: борода да рукавицы, и сидит чёрт знает на чём; а привстал, да замахнулся, да затянул песню — кони вихрем, спицы в колесах смешались в один гладкий круг, только дрогнула дорога, да вскрикнул в испуге остановившийся пешеход — и вон она понеслась, понеслась, понеслась! +Н.В. Гоголь diff --git a/docs/samples/thai.txt b/docs/samples/thai.txt new file mode 100644 index 00000000..cd37984b --- /dev/null +++ b/docs/samples/thai.txt @@ -0,0 +1,2 @@ +ณ ยามที่โลกต้องการเอ่ยถ้อยคำใดๆ โลกจะใช้เพียง Unicode เราจึงขอเชิญชวนท่านรีบลงทะเบียนงาน International Unicode Conference ครั้งที่ 10 ซึ่งจะจัดให้มีขึ้น ณ เมือง Mainz ประเทศเยอรมัน ในระหว่างวันที่ 10-12 มีนาคม ค.ศ. 1997 เสียแต่บัดนี้ โดยในงานประชุมดังกล่าว ท่านจะมีโอกาสได้พบกับบรรดาผู้เชี่ยวชาญจากธุรกิจอินเตอร์เน็ตและ Unicode ธุรกิจ Internationalization และ Localization จากทุกมุมทั่วโลก พร้อมรับทราบการใช้ประโยชน์จาก Unicode ร่วมกับระบบปฏิบัติการและโปรแกรมต่างๆ ฟอนต์ รูปแบบข้อความ รวมทั้งวิทยาการด้านคอมพิวเตอร์ในภาษาต่างๆ + diff --git a/docs/utils/regexp_examples.txt b/docs/utils/regexp_examples.txt new file mode 100644 index 00000000..8313a2fd --- /dev/null +++ b/docs/utils/regexp_examples.txt @@ -0,0 +1,16 @@ +(?<=[ΑαΕεΟοΗηΩω])\\u03CD +(?![ΑαΕεΟοΗηΩω])\\u03A5 +(?<=\\b)\\u03BC\\u03C0 +(ह़=?)(?=[\\u093E\\u093F\\u0940\\u0941\\u0942\\u0943\\u0944\\u0945]) +[\u064a|\u06cc]\u0651 +(?<=\\b\u0627\u0644[\u0600-\u06ff])\u0629 +\u0629$ +^b +(?<=)\u042a(?=\b) +(([क]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([क])(?=\b)) +a\1b +(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629 +(?<=[A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])(-?)ᄀ +(?<=[ЗзЛлНнСсЦц])\u044C +\\1\u0301 +\u0650[\u064a|\u06cc] \ No newline at end of file diff --git a/lib/interscript.rb b/lib/interscript.rb index 90c859f4..21c9c2fc 100644 --- a/lib/interscript.rb +++ b/lib/interscript.rb @@ -2,7 +2,14 @@ require "yaml" module Interscript + # An error caused by a lack of some map class MapNotFoundError < StandardError; end + # An error caused by a missing dependency + class ExternalUtilError < StandardError; end + # An error caused by a particular compiler + class SystemConversionError < StandardError; end + # An error caused by an incorrect map implementation + class MapLogicError < StandardError; end class << self def load_path @@ -41,9 +48,9 @@ def transliterate_each(system_code, string, maps={}, &block) load(system_code, maps).(string, each: true, &block) end - def transliterate_file(system_code, input_file, output_file, maps={}) + def transliterate_file(system_code, input_file, output_file, maps={}, compiler: Interscript::Interpreter) input = File.read(input_file) - output = transliterate(system_code, input, maps) + output = transliterate(system_code, input, maps, compiler: compiler) File.open(output_file, 'w') do |f| f.puts(output) @@ -118,11 +125,12 @@ def rababa_provision(model_name, model_uri) ([ENV["RABABA_DATA"]] + possible_paths).compact.each do |path| FileUtils.mkdir_p(path) - write_path = path unless write_path + write_path = path + break rescue end - - raise StandardError, "Can't find a writable path for Rababa. Consider setting a RABABA_DATA environment variable" unless write_path + + raise ExternalUtilError, "Can't find a writable path for Rababa. Consider setting a RABABA_DATA environment variable" unless write_path model_path = "#{write_path}/model-#{model_name}.onnx" @@ -130,8 +138,8 @@ def rababa_provision(model_name, model_uri) if File.exist?(model_path) && File.mtime(model_path) + 3600 >= Time.now return model_path else - data = URI.open(model_uri).read - File.write(model_path, data) + data = URI.open(model_uri, encoding: "BINARY").read + File.binwrite(model_path, data) return model_path end end diff --git a/lib/interscript/command.rb b/lib/interscript/command.rb index 6457e5de..aa017e27 100644 --- a/lib/interscript/command.rb +++ b/lib/interscript/command.rb @@ -1,20 +1,30 @@ require 'thor' require 'interscript' require 'json' + module Interscript # Command line interface class Command < Thor desc '', 'Transliterate text' option :system, aliases: '-s', required: true, desc: 'Transliteration system' option :output, aliases: '-o', required: false, desc: 'Output file' + option :compiler, aliases: '-c', required: false, desc: 'Compiler (eg. Interscript::Compiler::Python)' # Was this option really well thought out? The last parameter is a cache, isn't it? #option :map, aliases: '-m', required: false, default: "{}", desc: 'Transliteration mapping json' def translit(input) + compiler = if options[:compiler] + compiler = options[:compiler].split("::").last.downcase + require "interscript/compiler/#{compiler}" + Object.const_get(options[:compiler]) + else + Interscript::Interpreter + end + if options[:output] - Interscript.transliterate_file(options[:system], input, options[:output]) #, JSON.parse(options[:map])) + Interscript.transliterate_file(options[:system], input, options[:output], compiler: compiler) else - puts Interscript.transliterate(options[:system], IO.read(input)) + puts Interscript.transliterate(options[:system], IO.read(input), compiler: compiler) end end @@ -24,5 +34,46 @@ def list puts path end end + + desc 'stats', 'Prints statistics about the maps we have' + def stats + maps = Interscript.maps(load_path: true) + parsed_maps = maps.map { |i| [i, Interscript.parse(i)] }.to_h + maps_by_rule_count = parsed_maps.transform_values do |map| + map.stages.values.map { |i| i.children.map { |j| j.is_a?(Interscript::Node::Group) ? j.children : j } }.flatten.count + end + + authorities, languages, source_scripts, target_scripts = 4.times.map do |i| + maps.group_by { |map| map.split('-')[i] } + end + + puts <<~END + Languages supported: #{languages.keys.count} + Source scripts supported: #{source_scripts.keys.count} + Target scripts supported: #{target_scripts.keys.count} + Authorities supported: #{authorities.keys.count} + Total number of rules in Interscript: #{maps_by_rule_count.values.sum} + + END + + authorities.each do |auth, auth_maps| + rule_counts = auth_maps.map { |i| maps_by_rule_count[i] } + puts <<~END + Authority #{auth}: + * Conversion systems: #{auth_maps.count} + * Total number of rules: #{rule_counts.sum} + + END + end + + puts <<~END + Interesting facts: + * #{maps_by_rule_count.max_by { |i| i.last }.first} has the most rules + * Authority #{authorities.max_by { |i| i.last.count }.first} has the most systems + * Language #{languages.max_by { |i| i.last.count }.first} has the most systems + * Source script #{source_scripts.max_by { |i| i.last.count }.first} has the most systems + * Target script #{target_scripts.max_by { |i| i.last.count }.first} has the most systems + END + end end end diff --git a/lib/interscript/compiler/javascript.rb b/lib/interscript/compiler/javascript.rb index 4b2ab338..07a79ed4 100644 --- a/lib/interscript/compiler/javascript.rb +++ b/lib/interscript/compiler/javascript.rb @@ -70,11 +70,11 @@ def compile_rule(r, map = @map, wrapper = false) # Try to build a tree a = [] r.children.each do |i| - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i - raise ArgumentError, "Can't parallelize rules with :before" if i.before - raise ArgumentError, "Can't parallelize rules with :after" if i.after - raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before - raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize rules with :before" if i.before + raise Interscript::SystemConversionError, "Can't parallelize rules with :after" if i.after + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_before" if i.not_before + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_after" if i.not_after next if i.reverse_run == true a << [compile_item(i.from, map, :par), compile_item(i.to, map, :parstr)] @@ -89,7 +89,7 @@ def compile_rule(r, map = @map, wrapper = false) # Otherwise let's build a megaregexp a = [] Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i| - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i next if i.reverse_run == true a << [build_regexp(i, map), compile_item(i.to, map, :parstr)] @@ -122,7 +122,7 @@ def compile_rule(r, map = @map, wrapper = false) end c += "s = Interscript.transliterate(#{stage.doc_name.to_json}, s, #{stage.name.to_json});\n" else - raise ArgumentError, "Can't compile unhandled #{r.class}" + raise Interscript::SystemConversionError, "Can't compile unhandled #{r.class}" end c end @@ -157,17 +157,17 @@ def compile_item i, doc=@map, target=nil astr = if i.map d = doc.dep_aliases[i.map].document a = d.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} of #{i.stage.map} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} of #{i.stage.map} not found" unless a "Interscript.get_alias_ALIASTYPE(#{a.doc_name.to_json}, #{a.name.to_json})" elsif Interscript::Stdlib::ALIASES.include?(i.name) if target != :re && Interscript::Stdlib.re_only_alias?(i.name) - raise ArgumentError, "Can't use #{i.name} in a #{target} context" + raise Interscript::SystemConversionError, "Can't use #{i.name} in a #{target} context" end stdlib_alias = true "Interscript.aliases.#{i.name}" else a = doc.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} not found" unless a "Interscript.get_alias_ALIASTYPE(#{a.doc_name.to_json}, #{a.name.to_json})" end @@ -205,7 +205,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::CaptureGroup if target != :re - raise ArgumentError, "Can't use a CaptureGroup in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a CaptureGroup in a #{target} context" end "(" + compile_item(i.data, doc, target) + ")" when Interscript::Node::Item::Maybe, @@ -217,7 +217,7 @@ def compile_item i, doc=@map, target=nil Interscript::Node::Item::MaybeSome => "*" }[i.class] if target == :par - raise ArgumentError, "Can't use a MaybeSome in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a MaybeSome in a #{target} context" end if Interscript::Node::Item::String === i.data && i.data.data.length != 1 "(?:" + compile_item(i.data, doc, target) + ")" + resuffix @@ -226,7 +226,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::CaptureRef if target == :par - raise ArgumentError, "Can't use CaptureRef in parallel mode" + raise Interscript::SystemConversionError, "Can't use CaptureRef in parallel mode" elsif target == :re "\\\\#{i.id}" elsif target == :str @@ -234,7 +234,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::Any if target == :str - raise ArgumentError, "Can't use Any in a string context" # A linter could find this! + raise Interscript::SystemConversionError, "Can't use Any in a string context" # A linter could find this! elsif target == :par i.data.map(&:data) elsif target == :re diff --git a/lib/interscript/compiler/python.rb b/lib/interscript/compiler/python.rb new file mode 100644 index 00000000..13722091 --- /dev/null +++ b/lib/interscript/compiler/python.rb @@ -0,0 +1,331 @@ +require 'pycall' + +class Interscript::Compiler::Python < Interscript::Compiler + def escape(val) + case val + when String, Integer + val.inspect + when Symbol + val.to_s.inspect + when Hash + "{"+ + val.map { |k,v| "#{escape k}:#{escape v}" }.join(",")+ + "}" + when Array + "[" + val.map { |i| escape i }.join(",") + "]" + when nil + "None" + else + pp [:error, val] + exit! + end + end + + def re_escape(val) + @pycall_regex ||= PyCall.import_module("regex") + @pycall_regex.escape(val).gsub("\\", "\\\\\\\\").gsub('"', "\\\\\"") + end + + def new_regexp(str) + "re.compile(\"#{str}\", re.MULTILINE)" + end + + def indent + @indent += 4 + yield + @indent -= 4 + end + + def emit(code) + @code << (" " * @indent) << code << "\n" + code + end + + def compile(map, debug: false) + @indent = 0 + @map = map + @debug = debug + @parallel_trees = {} + @parallel_regexps = {} + @code = "" + emit "import interscript" + emit "import regex as re" + map.dependencies.map(&:full_name).each do |dep| + emit "interscript.load_map(#{escape dep})" + end + + emit "interscript.stdlib.define_map(#{escape map.name})" + + map.aliases.each do |name, value| + val = compile_item(value.data, map, :str) + emit "interscript.stdlib.add_map_alias(#{escape map.name}, #{escape name}, #{val})" + val = "\"" + compile_item(value.data, map, :re) + "\"" + emit "interscript.stdlib.add_map_alias_re(#{escape map.name}, #{escape name}, #{val})" + end + + map.stages.each do |_, stage| + compile_rule(stage, @map, true) + end + @parallel_trees.each do |k,v| + emit "_PTREE_#{k} = #{escape v}" + end + @parallel_regexps.each do |k,v| + v = %{["#{v[0]}", #{escape v[1]}]} + emit "_PRE_#{k} = #{v}" + end + end + + def parallel_regexp_compile(subs_hash) + # puts subs_hash.inspect + regexp = subs_hash.each_with_index.map do |p,i| + "(?P<_%d>%s)" % [i,p[0]] + end.join("|") + subs_regexp = regexp + # puts subs_regexp.inspect + end + + def compile_rule(r, map = @map, wrapper = false) + return if r.reverse_run == true + case r + when Interscript::Node::Stage + if @debug + emit "if not hasattr(interscript, 'map_debug'):" + indent { emit "interscript.map_debug = []" } + end + emit "def _stage_#{r.name}(s):" + indent do + r.children.each do |t| + comp = compile_rule(t, map) + emit %{interscript.map_debug.append([s, #{escape @map.name.to_s}, #{escape r.name.to_s}, #{escape t.inspect}, #{escape comp}])} if @debug + end + emit "return s\n" + end + emit "interscript.stdlib.add_map_stage(#{escape @map.name}, #{escape r.name}, _stage_#{r.name})" + when Interscript::Node::Group::Parallel + begin + # Try to build a tree + a = [] + r.children.each do |i| + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize rules with :before" if i.before + raise Interscript::SystemConversionError, "Can't parallelize rules with :after" if i.after + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_before" if i.not_before + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_after" if i.not_after + + next if i.reverse_run == true + a << [compile_item(i.from, map, :par), compile_item(i.to, map, :parstr)] + end + ah = a.hash.abs + unless @parallel_trees.include? ah + tree = Interscript::Stdlib.parallel_replace_compile_tree(a) + @parallel_trees[ah] = tree + end + emit "s = interscript.stdlib.parallel_replace_tree(s, _PTREE_#{ah})" + rescue + # Otherwise let's build a megaregexp + a = [] + Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i| + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + + next if i.reverse_run == true + a << [build_regexp(i, map), compile_item(i.to, map, :parstr)] + end + ah = a.hash.abs + unless @parallel_regexps.include? ah + re = parallel_regexp_compile(a) + @parallel_regexps[ah] = [re, a.map(&:last)] + end + emit "s = interscript.stdlib.parallel_regexp_gsub(s, *_PRE_#{ah})" + end + when Interscript::Node::Rule::Sub + from = new_regexp build_regexp(r, map) + if r.to == :upcase + to = 'interscript.stdlib.upper' + elsif r.to == :downcase + to = 'interscript.stdlib.lower' + else + to = compile_item(r.to, map, :str) + end + emit "s = #{from}.sub(#{to}, s)" + when Interscript::Node::Rule::Funcall + emit "s = interscript.functions.#{r.name}(s, #{escape r.kwargs})" + when Interscript::Node::Rule::Run + if r.stage.map + doc = map.dep_aliases[r.stage.map].document + stage = doc.imported_stages[r.stage.name] + else + stage = map.imported_stages[r.stage.name] + end + emit "s = interscript.transliterate(#{escape stage.doc_name}, s, #{escape stage.name})" + else + raise Interscript::SystemConversionError, "Can't compile unhandled #{r.class}" + end + end + + def build_regexp(r, map=@map) + from = compile_item(r.from, map, :re) + before = compile_item(r.before, map, :re) if r.before + after = compile_item(r.after, map, :re) if r.after + not_before = compile_item(r.not_before, map, :re) if r.not_before + not_after = compile_item(r.not_after, map, :re) if r.not_after + + re = "" + re += "(?<=#{before})" if before + re += "(? "?" , + Interscript::Node::Item::Some => "+" , + Interscript::Node::Item::MaybeSome => "*" }[i.class] + + if target == :par + raise Interscript::SystemConversionError, "Can't use a Maybe in a #{target} context" + end + if Interscript::Node::Item::String === i.data && i.data.data.length != 1 + "(?:" + compile_item(i.data, doc, target) + ")" + resuffix + else + compile_item(i.data, doc, target) + resuffix + end + when Interscript::Node::Item::CaptureRef + if target == :par + raise Interscript::SystemConversionError, "Can't use CaptureRef in parallel mode" + elsif target == :re + "\\\\#{i.id}" + elsif target == :str + "\"\\\\#{i.id}\"" + end + when Interscript::Node::Item::Any + if target == :str + raise Interscript::SystemConversionError, "Can't use Any in a string context" # A linter could find this! + elsif target == :par + i.data.map(&:data) + elsif target == :re + case i.value + when Array + data = i.data.map { |j| compile_item(j, doc, target) } + "(?:"+data.join("|")+")" + when String + "[#{re_escape(i.value)}]" + when Range + "[#{re_escape(i.value.first)}-#{re_escape(i.value.last)}]" + end + end + end + end + + @maps_loaded = {} + @ctx = nil + class << self + attr_accessor :maps_loaded + attr_accessor :ctx + end + + def load + if !self.class.maps_loaded[@map.name] + @map.dependencies.each do |dep| + dep = dep.full_name + if !self.class.maps_loaded[dep] + Interscript.load(dep, compiler: self.class).load + end + end + + ctx = self.class.ctx + python_src_path = File.join(__dir__, '..', '..', '..', '..', 'python', 'src') + unless ctx + PyCall.sys.path.append(python_src_path) + self.class.ctx = PyCall.import_module("interscript") + end + #puts @code + Dir.mkdir("#{python_src_path}/interscript/maps") rescue nil + File.write("#{python_src_path}/interscript/maps/#{@map.name}.py", @code) + self.class.ctx.load_map(@map.name) + + self.class.maps_loaded[@map.name] = true + end + end + + def call(str, stage=:main) + load + self.class.ctx.transliterate(@map.name, str, stage.to_s) + end + + def self.read_debug_data + (ctx['map_debug'] || []).map(&:to_a).to_a + end + + def self.reset_debug_data + ctx['map_debug'].clear + end +end diff --git a/lib/interscript/compiler/ruby.rb b/lib/interscript/compiler/ruby.rb index 78788ad3..dcdefcdf 100644 --- a/lib/interscript/compiler/ruby.rb +++ b/lib/interscript/compiler/ruby.rb @@ -60,11 +60,11 @@ def compile_rule(r, map = @map, wrapper = false) # Try to build a tree a = [] r.children.each do |i| - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i - raise ArgumentError, "Can't parallelize rules with :before" if i.before - raise ArgumentError, "Can't parallelize rules with :after" if i.after - raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before - raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize rules with :before" if i.before + raise Interscript::SystemConversionError, "Can't parallelize rules with :after" if i.after + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_before" if i.not_before + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_after" if i.not_after next if i.reverse_run == true a << [compile_item(i.from, map, :par), compile_item(i.to, map, :parstr)] @@ -79,7 +79,7 @@ def compile_rule(r, map = @map, wrapper = false) # Otherwise let's build a megaregexp a = [] Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i| - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i next if i.reverse_run == true a << [build_regexp(i, map), compile_item(i.to, map, :parstr)] @@ -112,7 +112,7 @@ def compile_rule(r, map = @map, wrapper = false) end c += "s = Interscript::Maps.transliterate(#{stage.doc_name.inspect}, s, #{stage.name.inspect})\n" else - raise ArgumentError, "Can't compile unhandled #{r.class}" + raise Interscript::SystemConversionError, "Can't compile unhandled #{r.class}" end c end @@ -146,17 +146,17 @@ def compile_item i, doc=@map, target=nil astr = if i.map d = doc.dep_aliases[i.map].document a = d.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} of #{i.stage.map} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} of #{i.stage.map} not found" unless a "Interscript::Maps.get_alias_ALIASTYPE(#{a.doc_name.inspect}, #{a.name.inspect})" elsif Interscript::Stdlib::ALIASES.include?(i.name) if target != :re && Interscript::Stdlib.re_only_alias?(i.name) - raise ArgumentError, "Can't use #{i.name} in a #{target} context" + raise Interscript::SystemConversionError, "Can't use #{i.name} in a #{target} context" end stdlib_alias = true "Interscript::Stdlib::ALIASES[#{i.name.inspect}]" else a = doc.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} not found" unless a "Interscript::Maps.get_alias_ALIASTYPE(#{a.doc_name.inspect}, #{a.name.inspect})" end @@ -194,7 +194,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::CaptureGroup if target != :re - raise ArgumentError, "Can't use a CaptureGroup in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a CaptureGroup in a #{target} context" end "(" + compile_item(i.data, doc, target) + ")" when Interscript::Node::Item::Maybe, @@ -206,7 +206,7 @@ def compile_item i, doc=@map, target=nil Interscript::Node::Item::MaybeSome => "*" }[i.class] if target == :par - raise ArgumentError, "Can't use a Maybe in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a Maybe in a #{target} context" end if Interscript::Node::Item::String === i.data && i.data.data.length != 1 "(?:" + compile_item(i.data, doc, target) + ")" + resuffix @@ -215,7 +215,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::CaptureRef if target == :par - raise ArgumentError, "Can't use CaptureRef in parallel mode" + raise Interscript::SystemConversionError, "Can't use CaptureRef in parallel mode" elsif target == :re "\\#{i.id}" elsif target == :str @@ -223,7 +223,7 @@ def compile_item i, doc=@map, target=nil end when Interscript::Node::Item::Any if target == :str - raise ArgumentError, "Can't use Any in a string context" # A linter could find this! + raise Interscript::SystemConversionError, "Can't use Any in a string context" # A linter could find this! elsif target == :par i.data.map(&:data) elsif target == :re diff --git a/lib/interscript/dsl.rb b/lib/interscript/dsl.rb index f25a0090..60301454 100644 --- a/lib/interscript/dsl.rb +++ b/lib/interscript/dsl.rb @@ -67,7 +67,7 @@ def self.parse(map_name, reverse: true) ruby << l end end - raise ArgumentError, "metadata stage isn't terminated" if md_reading + raise Interscript::MapLogicError, "metadata stage isn't terminated" if md_reading ruby, yaml = ruby.join("\n"), yaml.join("\n") obj = Interscript::DSL::Document.new(map_name) @@ -76,7 +76,12 @@ def self.parse(map_name, reverse: true) yaml = if yaml =~ /\A\s*\z/ {} else - YAML.load(yaml, exc_fname) + unsafe_load = if YAML.respond_to? :unsafe_load + :unsafe_load + else + :load + end + YAML.public_send(unsafe_load, yaml, filename: exc_fname) end md = Interscript::DSL::Metadata.new(yaml: true, map_name: map_name, library: library) do diff --git a/lib/interscript/dsl/aliases.rb b/lib/interscript/dsl/aliases.rb index 62e6af65..da9c831f 100644 --- a/lib/interscript/dsl/aliases.rb +++ b/lib/interscript/dsl/aliases.rb @@ -14,7 +14,7 @@ def def_alias(name, value) end unless Symbol === name - raise TypeError, "Alias name must be a Symbol, given #{name.class}" + raise Interscript::SystemConversionError, "Alias name must be a Symbol, given #{name.class}" end puts "def_alias(#{name.inspect}, #{thing.inspect})" if $DEBUG diff --git a/lib/interscript/dsl/group.rb b/lib/interscript/dsl/group.rb index 4a71e1d0..1283a16f 100644 --- a/lib/interscript/dsl/group.rb +++ b/lib/interscript/dsl/group.rb @@ -10,7 +10,7 @@ def initialize(&block) def run(stage, **kwargs) if stage.class != Interscript::Node::Item::Stage - raise TypeError, "I::Node::Item::Stage expected, got #{stage.class}" + raise Interscript::MapLogicError, "I::Node::Item::Stage expected, got #{stage.class}" end @node.children << Interscript::Node::Rule::Run.new(stage, **kwargs) end diff --git a/lib/interscript/dsl/items.rb b/lib/interscript/dsl/items.rb index ef286128..6b80d64b 100644 --- a/lib/interscript/dsl/items.rb +++ b/lib/interscript/dsl/items.rb @@ -55,7 +55,7 @@ module Maps class << self # Select a remote map def [] map - Symbol === map or raise TypeError, "A map name must be a Symbol, not #{alias_name.class}" + Symbol === map or raise Interscript::MapLogicError, "A map name must be a Symbol, not #{alias_name.class}" Map.new(map) end alias method_missing [] @@ -68,7 +68,7 @@ def initialize name; @name = name; end # Implementation of `map.x.aliasname` def [] alias_name - Symbol === alias_name or raise TypeError, "An alias name must be a Symbol, not #{alias_name.class}" + Symbol === alias_name or raise Interscript::MapLogicError, "An alias name must be a Symbol, not #{alias_name.class}" Interscript::Node::Item::Alias.new(alias_name, map: @name) end alias method_missing [] diff --git a/lib/interscript/dsl/metadata.rb b/lib/interscript/dsl/metadata.rb index 7feb4b59..85c19497 100644 --- a/lib/interscript/dsl/metadata.rb +++ b/lib/interscript/dsl/metadata.rb @@ -4,7 +4,7 @@ class Interscript::DSL::Metadata attr_accessor :node def initialize(yaml: false, map_name: "", library: true, &block) - raise ArgumentError, "Can't evaluate metadata from Ruby context" unless yaml + raise Interscript::MapLogicError, "Can't evaluate metadata from Ruby context" unless yaml @map_name = map_name @node = Interscript::Node::MetaData.new self.instance_exec(&block) @@ -20,13 +20,12 @@ def initialize(yaml: false, map_name: "", library: true, &block) STANDARD_STRING_KEYS = %i{authority_id id language source_script destination_script - name url creation_date adoption_date description - character source confirmation_date} + name creation_date adoption_date description + character source confirmation_date original_description} - STANDARD_ARRAY_KEYS = %i{notes} + STANDARD_ARRAY_KEYS = %i{notes implementation_notes original_notes url} - NONSTANDARD_KEYS = %i{special_rules original_description original_notes - implementation_notes} + NONSTANDARD_KEYS = %i{special_rules} NECESSARY_KEYS = %i{name language source_script destination_script} diff --git a/lib/interscript/interpreter.rb b/lib/interscript/interpreter.rb index 5def726c..7db1dfb2 100644 --- a/lib/interscript/interpreter.rb +++ b/lib/interscript/interpreter.rb @@ -92,11 +92,11 @@ def execute_rule r # Try to build a tree subs_array = [] r.children.each do |i| - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i - raise ArgumentError, "Can't parallelize rules with :before" if i.before - raise ArgumentError, "Can't parallelize rules with :after" if i.after - raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before - raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize rules with :before" if i.before + raise Interscript::SystemConversionError, "Can't parallelize rules with :after" if i.after + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_before" if i.not_before + raise Interscript::SystemConversionError, "Can't parallelize rules with :not_after" if i.not_after next if i.reverse_run == true subs_array << [build_item(i.from, :par), build_item(i.to, :parstr)] end @@ -109,7 +109,7 @@ def execute_rule r # Otherwise let's build a megaregexp subs_array = [] Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i| # rule.from.max_length gives somewhat better test results, why is that - raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i + raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i next if i.reverse_run == true subs_array << [build_regexp(i), build_item(i.to, :parstr)] end @@ -178,16 +178,16 @@ def build_item i, target=nil, doc=@map if i.map d = doc.dep_aliases[i.map].document a = d.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} of #{i.stage.map} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} of #{i.stage.map} not found" unless a build_item(a.data, target, d) elsif Interscript::Stdlib::ALIASES.include?(i.name) if target != :re && Interscript::Stdlib.re_only_alias?(i.name) - raise ArgumentError, "Can't use #{i.name} in a #{target} context" + raise Interscript::SystemConversionError, "Can't use #{i.name} in a #{target} context" end Interscript::Stdlib::ALIASES[i.name] else a = doc.imported_aliases[i.name] - raise ArgumentError, "Alias #{i.name} not found" unless a + raise Interscript::SystemConversionError, "Alias #{i.name} not found" unless a build_item(a.data, target, doc) end when Interscript::Node::Item::String @@ -208,7 +208,7 @@ def build_item i, target=nil, doc=@map end when Interscript::Node::Item::CaptureGroup if target == :par - raise ArgumentError, "Can't use a CaptureGroup in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a CaptureGroup in a #{target} context" end "(" + build_item(i.data, target, doc) + ")" when Interscript::Node::Item::Maybe, @@ -220,7 +220,7 @@ def build_item i, target=nil, doc=@map Interscript::Node::Item::MaybeSome => "*" }[i.class] if target == :par - raise ArgumentError, "Can't use a MaybeSome in a #{target} context" + raise Interscript::SystemConversionError, "Can't use a MaybeSome in a #{target} context" end if Interscript::Node::Item::String === i.data && i.data.data.length != 1 "(?:" + build_item(i.data, target, doc) + ")" + resuffix @@ -229,13 +229,13 @@ def build_item i, target=nil, doc=@map end when Interscript::Node::Item::CaptureRef if target == :par - raise ArgumentError, "Can't use CaptureRef in parallel mode" + raise Interscript::SystemConversionError, "Can't use CaptureRef in parallel mode" end "\\#{i.id}" when Interscript::Node::Item::Any if target == :str # We may never reach this point - raise ArgumentError, "Can't use Any in a string context" + raise Interscript::SystemConversionError, "Can't use Any in a string context" elsif target == :par i.data.map(&:data) elsif target == :re diff --git a/lib/interscript/node/item.rb b/lib/interscript/node/item.rb index dad62d0e..fe2e81de 100644 --- a/lib/interscript/node/item.rb +++ b/lib/interscript/node/item.rb @@ -42,7 +42,7 @@ def ==(other) def self.try_convert(i) i = Interscript::Node::Item::String.new(i) if i.class == ::String - raise TypeError, "Wrong type #{i.class}, expected I::Node::Item" unless Interscript::Node::Item === i + raise Interscript::MapLogicError, "Wrong type #{i.class}, expected I::Node::Item" unless Interscript::Node::Item === i i end end diff --git a/lib/interscript/node/item/any.rb b/lib/interscript/node/item/any.rb index f336b196..6e5a786f 100644 --- a/lib/interscript/node/item/any.rb +++ b/lib/interscript/node/item/any.rb @@ -10,7 +10,7 @@ def initialize data self.value = Interscript::Stdlib::ALIASES[data.name] else puts data.inspect - raise TypeError, "Wrong type #{data[0].class}, excepted Array, String or Range" + raise Interscript::MapLogicError, "Wrong type #{data[0].class}, excepted Array, String or Range" end end diff --git a/lib/interscript/node/item/group.rb b/lib/interscript/node/item/group.rb index 8203d637..b503efa1 100644 --- a/lib/interscript/node/item/group.rb +++ b/lib/interscript/node/item/group.rb @@ -48,7 +48,7 @@ def verify! end if wrong - raise TypeError, "An I::Node::Item::Group can't contain an #{wrong.class} item." + raise Interscript::MapLogicError, "An I::Node::Item::Group can't contain an #{wrong.class} item." end end diff --git a/lib/interscript/node/rule/sub.rb b/lib/interscript/node/rule/sub.rb index 6fe88b3b..7086e456 100644 --- a/lib/interscript/node/rule/sub.rb +++ b/lib/interscript/node/rule/sub.rb @@ -184,7 +184,7 @@ def reverse_transfer from, to node else state[:right][node.id] = true - state[:left][node.id - 1] or raise "Capture count doesn't match" + state[:left][node.id - 1] or raise Interscript::MapLogicError, "Capture count doesn't match" end when Interscript::Node::Item::CaptureGroup state[:left] << node diff --git a/lib/interscript/stdlib.rb b/lib/interscript/stdlib.rb index 36ccdf31..2f9100d7 100644 --- a/lib/interscript/stdlib.rb +++ b/lib/interscript/stdlib.rb @@ -226,7 +226,7 @@ def self.unseparate(output, separator: " ") def self.secryst(output, model:) require "secryst" rescue nil # Try to load secryst, but don't fail hard if not possible. unless defined? Secryst - raise StandardError, "Secryst is not loaded. Please read docs/Usage_with_Secryst.adoc" + raise Interscript::ExternalUtilError, "Secryst is not loaded. Please read docs/Usage_with_Secryst.adoc" end Interscript.secryst_index_locations.each do |remote| Secryst::Provisioning.add_remote(remote) @@ -240,7 +240,7 @@ def self.secryst(output, model:) def self.rababa(output, config:) require "rababa" rescue nil # Try to load rababa, but don't fail hard if not possible. unless defined? Rababa - raise StandardError, "Rababa is not loaded. Please read docs/Usage_with_Rababa.adoc" + raise Interscript::ExternalUtilError, "Rababa is not loaded. Please read docs/Usage_with_Rababa.adoc" end config_value = Interscript.rababa_configs[config] diff --git a/lib/interscript/version.rb b/lib/interscript/version.rb index 8f3fcab4..72eeefb3 100644 --- a/lib/interscript/version.rb +++ b/lib/interscript/version.rb @@ -1,3 +1,3 @@ module Interscript - VERSION = "2.3.2" + VERSION = "2.4.5" end diff --git a/lib/interscript/visualize/map.html.erb b/lib/interscript/visualize/map.html.erb index fcb8ac29..222ee8ad 100644 --- a/lib/interscript/visualize/map.html.erb +++ b/lib/interscript/visualize/map.html.erb @@ -35,7 +35,7 @@ <% case k when :url %> -
<%= h v %> +
<% v.each do |i| %><%= h i %><% if i != v.last %>; <% end %><% end %> <% when :notes, :implementation_notes, :special_rules, :original_notes, :original_description # We ignore notes for now %> <% else %>
<%= h v %> @@ -43,4 +43,4 @@ <% end %> -<%= render_stage(self.map.name, :main) %> \ No newline at end of file +<%= render_stage(self.map.name, :main) %> diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index eabec063..101688ff 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -9,6 +9,7 @@ require "interscript" require "interscript/compiler/ruby" require "interscript/compiler/javascript" unless ENV["SKIP_JS"] +require "interscript/compiler/python" unless ENV["SKIP_PYTHON"] require "interscript/utils/helpers" RSpec.configure do |config| @@ -29,6 +30,7 @@ def each_compiler &block compilers << Interscript::Interpreter compilers << Interscript::Compiler::Ruby compilers << Interscript::Compiler::Javascript unless ENV["SKIP_JS"] + compilers << Interscript::Compiler::Python unless ENV["SKIP_PYTHON"] compilers.each do |compiler| block.(compiler)