Skip to content

TestUnicodeInvariants: add tests for TestCodeInvariants #247

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,6 @@
import java.util.Map;
import java.util.TreeMap;

import org.unicode.cldr.draft.FileUtilities;
import org.unicode.cldr.util.Tabber;
import org.unicode.cldr.util.Tabber.HTMLTabber;
import org.unicode.props.BagFormatter;
import org.unicode.jsp.ICUPropertyFactory;
import org.unicode.cldr.util.props.UnicodeLabel;
import org.unicode.props.UnicodeProperty;
import org.unicode.props.UnicodeProperty.Factory;
import org.unicode.props.UnicodeProperty.PatternMatcher;
import org.unicode.props.IndexUnicodeProperties;
import org.unicode.text.utility.Settings;

import com.ibm.icu.dev.tool.UOption;
import com.ibm.icu.dev.util.UnicodeMap;
import com.ibm.icu.lang.UCharacter;
Expand All @@ -34,6 +22,18 @@
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;

import org.unicode.cldr.draft.FileUtilities;
import org.unicode.cldr.util.Tabber;
import org.unicode.cldr.util.Tabber.HTMLTabber;
import org.unicode.cldr.util.props.UnicodeLabel;
import org.unicode.jsp.ICUPropertyFactory;
import org.unicode.props.BagFormatter;
import org.unicode.props.IndexUnicodeProperties;
import org.unicode.props.UnicodeProperty;
import org.unicode.props.UnicodeProperty.Factory;
import org.unicode.props.UnicodeProperty.PatternMatcher;
import org.unicode.text.utility.Settings;

public class TestUnicodeInvariants {
private static final boolean DEBUG = false;

Expand All @@ -45,6 +45,7 @@ public class TestUnicodeInvariants {
private static final boolean SHOW_LOOKUP = false;
private static int showRangeLimit = 20;
static boolean doHtml = true;
public static final String DEFAULT_FILE = "UnicodeInvariantTest.txt";

private static final int
//HELP1 = 0,
Expand All @@ -64,7 +65,7 @@ public class TestUnicodeInvariants {
public static void main(String[] args) throws IOException {
UOption.parseArgs(args, options);

String file = "UnicodeInvariantTest.txt";
String file = DEFAULT_FILE;
if (options[FILE].doesOccur) {
file = options[FILE].value;
}
Expand All @@ -77,12 +78,6 @@ public static void main(String[] args) throws IOException {
System.out.println("HTML?\t" + doHtml);

testInvariants(file, doRange);
if (TestCodeInvariants.testScriptExtensions() < 0) {
System.out.println("Invariant test for Script_Extensions failed!");
}
if (TestCodeInvariants.testGcbInDecompositions(false) < 0) {
System.out.println("Invariant test for GCB in canonical decompositions failed!");
}
}

static Transliterator toHTML;
Expand Down Expand Up @@ -113,7 +108,31 @@ enum Expected {empty, not_empty, irrelevant};

private static PrintWriter out;

public static void testInvariants(String outputFile, boolean doRange) throws IOException {
/**
* Fetch a reader for our input data.
* @param inputFile if null, read DEFAULT_FILE from classpath
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why would it be null? Line 80 calls testInvariants() with DEFAULT_FILE, or with a String from a command-line argument, if there is one.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I pass null from TestTestUnicodeInvariants to mean default

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about passing TestUnicodeInvariants.DEFAULT_FILE from the TestTest... code?

* @return BufferedReader
* @throws IOException
*/
private static BufferedReader getInputReader(String inputFile) throws IOException {
if (inputFile != null) {
return FileUtilities.openUTF8Reader(Settings.SRC_UCD_DIR, inputFile);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI This should work in the build bots. For example, https://github.com/unicode-org/unicodetools/blob/main/.github/workflows/build-jsp.yml#L52 does set -DUNICODETOOLS_REPO_DIR=$(pwd), and Settings.SRC_UCD_DIR is relative to that. We read all source data files relative to that path.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should but does not

}

// null: read it from resource data
return FileUtilities.openFile(TestUnicodeInvariants.class, DEFAULT_FILE);
}

/**
*
* @param inputFile file to input, defaults to DEFAULT_FILE
* @param doRange normally true
* @return number of failures (0 is better)
* @throws IOException
*/
public static int testInvariants(String inputFile, boolean doRange) throws IOException {
parseErrorCount = 0;
testFailureCount = 0;
boolean showScript = false;
try (final PrintWriter out2 = FileUtilities.openUTF8Writer(Settings.Output.GEN_DIR, "UnicodeTestResults." + (doHtml ? "html" : "txt"))) {
final StringWriter writer = new StringWriter();
Expand All @@ -134,7 +153,7 @@ public static void testInvariants(String outputFile, boolean doRange) throws IOE
} else {
out3.write('\uFEFF'); // BOM
}
try (final BufferedReader in = FileUtilities.openUTF8Reader(Settings.SRC_UCD_DIR, outputFile)) {
try (final BufferedReader in = getInputReader(inputFile)) {
final HTMLTabber tabber = new Tabber.HTMLTabber();

errorLister = new BagFormatter()
Expand Down Expand Up @@ -167,8 +186,6 @@ public static void testInvariants(String outputFile, boolean doRange) throws IOE
// new ChainedSymbolTable(new SymbolTable[] {
// ToolUnicodePropertySource.make(UCD.lastVersion).getSymbolTable("\u00D7"),
// ToolUnicodePropertySource.make(Default.ucdVersion()).getSymbolTable("")});
parseErrorCount = 0;
testFailureCount = 0;
while (true) {
String line = in.readLine();
if (line == null) {
Expand Down Expand Up @@ -226,6 +243,7 @@ public static void testInvariants(String outputFile, boolean doRange) throws IOE
}
out = null;
}
return parseErrorCount + testFailureCount;
}

static class PropertyComparison {
Expand Down
6 changes: 3 additions & 3 deletions unicodetools/src/main/java/org/unicode/text/UCD/UCD.java
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ public byte getBidiClass(int codePoint) {
}
BIDI_BN_SET.addAll(DefaultIg);
}

if (SHOW_LOADING) {
System.out.println("BIDI_R_SET: " + BIDI_R_SET);
System.out.println("BIDI_AL_SET: " + BIDI_AL_SET);
Expand Down Expand Up @@ -604,7 +604,7 @@ private void populateHanExceptions(UnicodeMap<String> numeric) {
if (code == 0x5793 || code == 0x4EAC) {
continue; // two exceptions!!
}

HanException except = (HanException) hanExceptions.get(code);
if (except != null) {
throw new IllegalArgumentException("Duplicate Numeric Value for U+" +
Expand Down Expand Up @@ -1949,7 +1949,7 @@ private void fillFromFile(String version) {
try {
ConvertUCD.main(new String[]{version});
} catch (final Exception e2) {
throw new ChainException("Can't build data file for {0}", new Object[]{version}, e2);
throw new ChainException("Can't build data file for " + version + " ({0})", new Object[]{version}, e2);
}
fillFromFile2(version);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
package org.unicode.text.UCD;

import static org.junit.jupiter.api.Assertions.assertEquals;

import java.util.Collections;
import java.util.EnumSet;
import java.util.Set;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestTemplate;
import org.unicode.cldr.util.Rational.FormatStyle;
import org.unicode.props.IndexUnicodeProperties;
import org.unicode.props.UcdProperty;
import org.unicode.props.UcdPropertyValues;
Expand All @@ -20,7 +25,7 @@

public class TestCodeInvariants {

private static final boolean VERBOSE = true;
private static final boolean VERBOSE = false;
private static final int TEST_PASS = 0;
private static final int TEST_FAIL = -1;

Expand All @@ -36,17 +41,13 @@ public class TestCodeInvariants {
static final UnicodeMap<Grapheme_Cluster_Break_Values> GCB =
IUP.loadEnum(UcdProperty.Grapheme_Cluster_Break, UcdPropertyValues.Grapheme_Cluster_Break_Values.class);

public static void main(String[] args) {
testScriptExtensions();
testGcbInDecompositions(true);
}

public static int testScriptExtensions() {
@Test
public void testScriptExtensions() {
int testResult = TEST_PASS;

main:
for (Age_Values age : Age_Values.values()) {
if (age == Age_Values.Unassigned
if (age == Age_Values.Unassigned
|| age.compareTo(SCX_FIRST_DEFINED) < 0) { // skip irrelevants
continue;
}
Expand Down Expand Up @@ -77,8 +78,8 @@ public static int testScriptExtensions() {
}
}

// We also have the invariants for implicit values, though not captured on the stability_policy page, that
// 1. BAD: scx={Common} and sc=Arabic.
// We also have the invariants for implicit values, though not captured on the stability_policy page, that
// 1. BAD: scx={Common} and sc=Arabic.
// If a character has a script extensions value with 1 implicit element, then it must be the script value for the character
// 2. BAD: scx={Common, Arabic}
// NO script extensions value set with more than one element can contain an implicit value
Expand Down Expand Up @@ -115,10 +116,11 @@ public static int testScriptExtensions() {
System.out.println("Script Extensions invariant works for version " + age + "\n");
}

return testResult;
assertEquals(TEST_PASS, testResult, "Invariant test for Script_Extensions failed!");
}

public static int testGcbInDecompositions(boolean showAllNfds) {
@Test
public void testGcbInDecompositions() {
int testResult = TEST_PASS;

final String gcbPropShortName = UcdProperty.Grapheme_Cluster_Break.getShortName();
Expand Down Expand Up @@ -149,20 +151,20 @@ public static int testGcbInDecompositions(boolean showAllNfds) {
}
}

if (showAllNfds || flagged) {
if (VERBOSE || flagged) {
System.out.print(Utility.hex(cp));
System.out.print(" (" + gcbPropShortName + "=" + GCB.get(cp).getShortName() + ")");
System.out.print(" ≡ " + Utility.hex(nfdOrNull) + " ( ");

for (int i = 0; i < nfdOrNull.length(); i += Character.charCount(ch)) {
ch = UTF16.charAt(nfdOrNull, i);
System.out.print(gcbPropShortName + "=" + GCB.get(ch).getShortName() + " ");
}

System.out.print(")");
System.out.print(" " + UTF16.valueOf(cp));
System.out.print(" \"" + NAME.get(cp) + "\"");

if (flagged) {
System.out.print(" ←");
++count;
Expand All @@ -175,7 +177,7 @@ public static int testGcbInDecompositions(boolean showAllNfds) {
System.out.println("Count: " + count
+ " characters have non-singleton canonical decompositions whose any non-first characters are GCB≠EX (marked with \'←\').");

return testResult;
assertEquals(TEST_PASS, testResult, "Invariant test for GCB in canonical decompositions failed!");
}

private static String showInfo(int codePoint, Script_Values value, Set<Script_Values> extensions) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package org.unicode.text.UCD;

import static org.junit.jupiter.api.Assertions.assertAll;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;

import java.io.IOException;

import org.junit.jupiter.api.Test;
import org.unicode.text.utility.Settings;

public class TestTestUnicodeInvariants {
@Test
void testSRC_UCD_DIR() {
assertAll("assert that no components of Settings.SRC_UCD_DIR are null",
() -> assertNotNull(Settings.SRC_UCD_DIR, "Settings.SRC_UCD_DIR"),
() -> assertNotNull(Settings.SRC_DIR, "Settings.SRC_DIR"),
() -> assertNotNull(Settings.UnicodeTools.UNICODETOOLS_RSRC_DIR, "Settings.UnicodeTools.UNICODETOOLS_RSRC_DIR"),
() -> assertNotNull(Settings.UnicodeTools.UNICODETOOLS_DIR, "Settings.UnicodeTools.UNICODETOOLS_DIR"),
() -> assertNotNull(Settings.UnicodeTools.UNICODETOOLS_REPO_DIR, "Settings.UnicodeTools.UNICODETOOLS_REPO_DIR"));
}

@Test
void testUnicodeInvariants() throws IOException {
int rc = TestUnicodeInvariants.testInvariants(null, true);
assertEquals(0, rc, "TestUnicodeInvariants.testInvariants() failed");
}
}