diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 89c5f57..c0908cb 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -7,7 +7,11 @@ on:
     branches: [ "master" ]
 
 env:
+  CARGO_INCREMENTAL: 0
   CARGO_TERM_COLOR: always
+  RUST_BACKTRACE: 1
+  RUSTFLAGS: -D warnings
+  RUSTDOCFLAGS: -D warnings
 
 jobs:
   build:
@@ -18,10 +22,19 @@ jobs:
       run: cargo build --verbose
     - name: Run tests
       run: cargo test --verbose
+    - name: Build docs
+      run: cargo doc
+    - name: Check formatting
+      run: cargo fmt --check
+    - name: Check clippy
+      run: cargo clippy --lib --tests
   regen:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v3
+    - uses: actions/setup-python@v5
+      with:
+        python-version: '3.12'
     - name: Regen
       run: cd scripts && python3 unicode.py
     - name: Diff
diff --git a/.gitignore b/.gitignore
index 2d7d550..12e0bd1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ Cargo.lock
 scripts/tmp
 scripts/*.txt
 scripts/*.rs
+bench_data/*
diff --git a/Cargo.toml b/Cargo.toml
index bd8da9c..49e7539 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,20 +2,23 @@
 
 name = "unicode-width"
 version = "0.1.11"
-authors = ["kwantam <kwantam@gmail.com>", "Manish Goregaokar <manishsmail@gmail.com>"]
-
+authors = [
+    "kwantam <kwantam@gmail.com>",
+    "Manish Goregaokar <manishsmail@gmail.com>",
+]
 homepage = "https://github.com/unicode-rs/unicode-width"
 repository = "https://github.com/unicode-rs/unicode-width"
 documentation = "https://unicode-rs.github.io/unicode-width"
 license = "MIT/Apache-2.0"
 keywords = ["text", "width", "unicode"]
 readme = "README.md"
+edition = "2021"
 description = """
 Determine displayed width of `char` and `str` types
 according to Unicode Standard Annex #11 rules.
 """
 
-exclude = [ "target/*", "Cargo.lock" ]
+exclude = ["target/*", "Cargo.lock"]
 
 [dependencies]
 std = { version = "1.0", package = "rustc-std-workspace-std", optional = true }
@@ -27,7 +30,6 @@ unicode-normalization = "0.1.23"
 
 [features]
 default = []
-bench = []
 rustc-dep-of-std = ['std', 'core', 'compiler_builtins']
 
 # Legacy, now a no-op
diff --git a/benches/benches.rs b/benches/benches.rs
new file mode 100644
index 0000000..c91cef4
--- /dev/null
+++ b/benches/benches.rs
@@ -0,0 +1,113 @@
+// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+#![feature(test)]
+
+extern crate test;
+
+use std::iter;
+
+use test::Bencher;
+
+use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
+
+#[bench]
+fn cargo(b: &mut Bencher) {
+    let string = iter::repeat('a').take(4096).collect::<String>();
+
+    b.iter(|| {
+        for c in string.chars() {
+            test::black_box(UnicodeWidthChar::width(c));
+        }
+    });
+}
+
+#[bench]
+fn stdlib(b: &mut Bencher) {
+    let string = iter::repeat('a').take(4096).collect::<String>();
+
+    b.iter(|| {
+        for c in string.chars() {
+            test::black_box(c.width());
+        }
+    });
+}
+
+#[bench]
+fn simple_if(b: &mut Bencher) {
+    let string = iter::repeat('a').take(4096).collect::<String>();
+
+    b.iter(|| {
+        for c in string.chars() {
+            test::black_box(simple_width_if(c));
+        }
+    });
+}
+
+#[bench]
+fn simple_match(b: &mut Bencher) {
+    let string = iter::repeat('a').take(4096).collect::<String>();
+
+    b.iter(|| {
+        for c in string.chars() {
+            test::black_box(simple_width_match(c));
+        }
+    });
+}
+
+#[inline]
+fn simple_width_if(c: char) -> Option<usize> {
+    let cu = c as u32;
+    if cu < 127 {
+        if cu > 31 {
+            Some(1)
+        } else if cu == 0 {
+            Some(0)
+        } else {
+            None
+        }
+    } else {
+        UnicodeWidthChar::width(c)
+    }
+}
+
+#[inline]
+fn simple_width_match(c: char) -> Option<usize> {
+    match c as u32 {
+        cu if cu == 0 => Some(0),
+        cu if cu < 0x20 => None,
+        cu if cu < 0x7f => Some(1),
+        _ => UnicodeWidthChar::width(c),
+    }
+}
+
+#[bench]
+fn enwik8(b: &mut Bencher) {
+    // To benchmark, download & unzip `enwik8` from https://data.deepai.org/enwik8.zip
+    let data_path = "bench_data/enwik8";
+    let string = std::fs::read_to_string(data_path).unwrap_or_default();
+    b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
+}
+
+#[bench]
+fn jawiki(b: &mut Bencher) {
+    // To benchmark, download & extract `jawiki-20240201-pages-articles-multistream-index.txt` from
+    // https://dumps.wikimedia.org/jawiki/20240201/jawiki-20240201-pages-articles-multistream-index.txt.bz2
+    let data_path = "bench_data/jawiki-20240201-pages-articles-multistream-index.txt";
+    let string = std::fs::read_to_string(data_path).unwrap_or_default();
+    b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
+}
+
+#[bench]
+fn emoji(b: &mut Bencher) {
+    // To benchmark, download emoji-style.txt from https://www.unicode.org/emoji/charts/emoji-style.txt
+    let data_path = "bench_data/emoji-style.txt";
+    let string = std::fs::read_to_string(data_path).unwrap_or_default();
+    b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
+}
diff --git a/scripts/unicode.py b/scripts/unicode.py
index e91f001..b50d40f 100755
--- a/scripts/unicode.py
+++ b/scripts/unicode.py
@@ -23,6 +23,8 @@
 import os
 import re
 import sys
+from collections import defaultdict
+from itertools import batched
 
 NUM_CODEPOINTS = 0x110000
 """An upper bound for which `range(0, NUM_CODEPOINTS)` contains Unicode's codespace."""
@@ -66,12 +68,13 @@ def fetch_open(filename: str):
     """Opens `filename` and return its corresponding file object. If `filename` isn't on disk,
     fetches it from `http://www.unicode.org/Public/UNIDATA/`. Exits with code 1 on failure.
     """
-    if not os.path.exists(os.path.basename(filename)):
+    basename = os.path.basename(filename)
+    if not os.path.exists(basename):
         os.system(f"curl -O http://www.unicode.org/Public/UNIDATA/{filename}")
     try:
-        return open(filename, encoding="utf-8")
+        return open(basename, encoding="utf-8")
     except OSError:
-        sys.stderr.write(f"cannot load {filename}")
+        sys.stderr.write(f"cannot load {basename}")
         sys.exit(1)
 
 
@@ -152,7 +155,8 @@ def load_zero_widths() -> "list[bool]":
 
     - it is in general category `Cc`,
     - or if it has the `Grapheme_Extend` property (determined from `DerivedCoreProperties.txt`),
-    - or if it has the `Default_Ignorable_Code_Point` property (determined from `DerivedCoreProperties.txt`),
+    - or if it is one of U+0CC0, U+0CC7, U+0CC8, U+0CCA, U+0CCB, U+1B3B, U+1B3D, or U+1B43,
+    - or if it has the `Default_Ignorable_Code_Point` property (determined from `DerivedCoreProperties.txt`) and is not U+115F,
     - or if it has a `Hangul_Syllable_Type` of `Vowel_Jamo` or `Trailing_Jamo` (determined from `HangulSyllableType.txt`).
     """
 
@@ -408,8 +412,71 @@ def make_tables(
     return tables
 
 
+def load_variation_sequences() -> "list[int]":
+    """Outputs a list of character ranages, corresponding to all the valid characters for starting
+    an emoji presentation sequence."""
+
+    with fetch_open("emoji/emoji-variation-sequences.txt") as sequences:
+        # Match all emoji presentation sequences
+        # (one codepoint followed by U+FE0F, and labeled "emoji style")
+        sequence = re.compile(r"^([0-9A-F]+)\s+FE0F\s*;\s+emoji style")
+        codepoints = []
+        for line in sequences.readlines():
+            if match := sequence.match(line):
+                cp = int(match.group(1), 16)
+                codepoints.append(cp)
+    return codepoints
+
+
+def make_variation_sequence_table(
+    seqs: "list[int]",
+    width_map,
+) -> "tuple[list[int], list[list[int]]]":
+    """Generates 2-level lookup table for whether a codepoint might start an emoji presentation sequence.
+    (Characters that are always wide may be excluded.)
+    The first level is a match on all but the 10 LSB, the second level is a 1024-bit bitmap for those 10 LSB.
+    """
+
+    prefixes_dict = defaultdict(set)
+    for cp in seqs:
+        prefixes_dict[cp >> 10].add(cp & 0x3FF)
+
+    # We don't strictly need to keep track of characters that are always wide,
+    # because being in an emoji variation seq won't affect their width.
+    # So store their info only when it wouldn't inflate the size of the tables.
+    for k in list(prefixes_dict.keys()):
+        if all(
+            map(
+                lambda cp: width_map[(k << 10) | cp] == EffectiveWidth.WIDE,
+                prefixes_dict[k],
+            )
+        ):
+            del prefixes_dict[k]
+
+    indexes = list(prefixes_dict.keys())
+
+    # Similarly, we can spuriously return `true` for always-wide characters
+    # even if not part of a presentation seq; this saves an additional lookup,
+    # so we should do it where there is no size cost.
+    for cp, width in enumerate(width_map):
+        if width == EffectiveWidth.WIDE and (cp >> 10) in indexes:
+            prefixes_dict[cp >> 10].add(cp & 0x3FF)
+
+    leaves = []
+    for cps in prefixes_dict.values():
+        leaf = [0] * 128
+        for cp in cps:
+            idx_in_leaf, bit_shift = divmod(cp, 8)
+            leaf[idx_in_leaf] |= 1 << bit_shift
+        leaves.append(leaf)
+    return (indexes, leaves)
+
+
 def emit_module(
-    out_name: str, unicode_version: "tuple[int, int, int]", tables: "list[Table]"
+    out_name: str,
+    unicode_version: "tuple[int, int, int]",
+    tables: "list[Table]",
+    variation_table: "tuple[list[int], list[list[int]]]",
 ):
     """Outputs a Rust module to `out_name` using table data from `tables`.
     If `TABLE_CFGS` is edited, you may need to edit the included code for `lookup_width`.
@@ -486,6 +553,43 @@ def emit_module(
 """
         )
 
+        variation_idx, variation_leaves = variation_table
+
+        module.write(
+            """
+    /// Whether this character forms an [emoji presentation sequence]
+    /// (https://www.unicode.org/reports/tr51/#def_emoji_presentation_sequence)
+    /// when followed by `'\\u{FEOF}'`.
+    /// Emoji presentation sequences are considered to have width 2.
+    /// This may spuriously return `true` or `false` for characters that are always wide.
+    #[inline]
+    pub fn starts_emoji_presentation_seq(c: char) -> bool {
+        let cp: u32 = c.into();
+
+        // First level of lookup uses all but 10 LSB
+        let top_bits = cp >> 10;
+        let idx_of_leaf: usize = match top_bits {
+"""
+        )
+
+        for i, msbs in enumerate(variation_idx):
+            module.write(f"            {msbs} => {i},\n")
+
+        module.write(
+            """            _ => return false,
+        };
+
+        // Extract the 3-9th (0-indexed) least significant bits of `cp`,
+        // and use them to index into `leaf_row`.
+        let idx_within_leaf = usize::try_from((cp >> 3) & 0x7F).unwrap();
+        let leaf_byte = EMOJI_PRESENTATION_LEAVES.0[idx_of_leaf][idx_within_leaf];
+
+        // Use the 3 LSB of `cp` to index into `leaf_byte`.
+        ((leaf_byte >> (cp & 7)) & 1) == 1
+    }
+"""
+        )
+
         module.write(
             """
     /// Returns the [UAX #11](https://www.unicode.org/reports/tr11/) based width of `c`, or
@@ -534,6 +638,29 @@ def emit_module(
                 module.write(f" 0x{byte:02X},")
             module.write("\n    ];\n")
             subtable_count = new_subtable_count
+
+        # emoji table
+
+        module.write(
+            f"""
+    #[repr(align(128))]
+    struct Align128<T>(T);
+
+    /// Array of 1024-bit bitmaps. Index into the correct (obtained from `EMOJI_PRESENTATION_INDEX`)
+    /// bitmap with the 10 LSB of your codepoint to get whether it can start an emoji presentation seq.
+    static EMOJI_PRESENTATION_LEAVES: Align128<[[u8; 128]; {len(variation_leaves)}]> = Align128([
+"""
+        )
+        for leaf in variation_leaves:
+            module.write("        [\n")
+            for row in batched(leaf, 14):
+                module.write("           ")
+                for entry in row:
+                    module.write(f" 0x{entry:02X},")
+                module.write("\n")
+            module.write("        ],\n")
+
+        module.write("    ]);\n")
         module.write("}\n")
 
 
@@ -543,10 +670,13 @@ def main(module_filename: str):
     `module_filename`.
 
     We obey the following rules in decreasing order of importance:
+    - Emoji presentation sequences are double-width.
     - The soft hyphen (`U+00AD`) is single-width. (https://archive.is/fCT3c)
     - Hangul jamo medial vowels & final consonants are zero-width.
     - All `Default_Ignorable_Code_Point`s are zero-width, except for U+115F HANGUL CHOSEONG FILLER.
-    - All codepoints in general categories `Cc`, `Mn`, or `Me` are zero-width.
+    - Control characters are zero-width.
+    - `Grapheme_Extend` characters, as well as eight spacing marks that canonically decompose to `Grapheme_Extend` characters,
+      are zero-width.
     - All codepoints with an East Asian Width of `Ambigous` are ambiguous-width.
     - All codepoints with an East Asian Width of `Wide` or `Fullwidth` are double-width.
     - All other codepoints (including unassigned codepoints and codepoints with an East Asian Width
@@ -570,16 +700,25 @@ def main(module_filename: str):
 
     tables = make_tables(TABLE_CFGS, enumerate(width_map))
 
+    emoji_variations = load_variation_sequences()
+    variation_table = make_variation_sequence_table(emoji_variations, width_map)
+
     print("------------------------")
     total_size = 0
     for i, table in enumerate(tables):
         size_bytes = len(table.to_bytes())
-        print(f"Table {i} Size: {size_bytes} bytes")
+        print(f"Table {i} size: {size_bytes} bytes")
         total_size += size_bytes
+    emoji_index_size = len(variation_table[0]) * 4
+    print(f"Emoji presentation index size: {emoji_index_size} bytes")
+    total_size += emoji_index_size
+    emoji_leaves_size = len(variation_table[1]) * len(variation_table[1][0])
+    print(f"Emoji presentation leaves size: {emoji_leaves_size} bytes")
+    total_size += emoji_leaves_size
     print("------------------------")
-    print(f"  Total Size: {total_size} bytes")
+    print(f"  Total size: {total_size} bytes")
 
-    emit_module(module_filename, version, tables)
+    emit_module(module_filename, version, tables, variation_table)
     print(f'Wrote to "{module_filename}"')
 
 
diff --git a/src/lib.rs b/src/lib.rs
index 2f22613..d952880 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -9,8 +9,11 @@
 // except according to those terms.
 
 //! Determine displayed width of `char` and `str` types according to
-//! [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
-//! rules.
+//! [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/),
+//! other portions of the Unicode standard, and common implementations of
+//! POSIX [`wcwidth()`](https://pubs.opengroup.org/onlinepubs/9699919799/).
+//! See the [Rules for determining width](#rules-for-determining-width) section
+//! for the exact rules.
 //!
 //! ```rust
 //! extern crate unicode_width;
@@ -41,30 +44,55 @@
 //! [dependencies]
 //! unicode-width = "0.1.5"
 //! ```
+//! # Rules for determining width
+//!
+//! This crate currently uses the following rules to determine the width of a
+//! character or string, in order of decreasing precedence. These may be tweaked in the future.
+//!
+//! 1. [Emoji presentation sequences](https://unicode.org/reports/tr51/#def_emoji_presentation_sequence)
+//!    have width 2. (The width of a string may therefore differ from the sum of the widths of its characters.)
+//! 2. [`'\u{00AD}'` SOFT HYPHEN](https://util.unicode.org/UnicodeJsps/character.jsp?a=00AD) has width 1.
+//! 3. [`'\u{115F}'` HANGUL CHOSEONG FILLER](https://util.unicode.org/UnicodeJsps/character.jsp?a=115F) has width 2.
+//! 4. The following have width 0:
+//!    1. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BHangul_Syllable_Type%3DV%7D%5Cp%7BHangul_Syllable_Type%3DT%7D)
+//!       with a [`Hangul_Syllable_Type`](https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G45593)
+//!       of `Vowel_Jamo` (`V`) or `Trailing_Jamo` (`T`),
+//!    2. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BDefault_Ignorable_Code_Point%7D)
+//!       with the [`Default_Ignorable_Code_Point`](https://www.unicode.org/versions/Unicode15.0.0/ch05.pdf#G40095) property,
+//!    3. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BGrapheme_Extend%7D)
+//!       with the [`Grapheme_Extend`](https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf#G52443) property,
+//!    4. [`'\u{0CC0}'` KANNADA VOWEL SIGN II](https://util.unicode.org/UnicodeJsps/character.jsp?a=0CC0),
+//!       [`'\u{0CC7}'` KANNADA VOWEL SIGN EE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0CC7),
+//!       [`'\u{0CC8}'` KANNADA VOWEL SIGN AI](https://util.unicode.org/UnicodeJsps/character.jsp?a=0CC8),
+//!       [`'\u{0CCA}'` KANNADA VOWEL SIGN O](https://util.unicode.org/UnicodeJsps/character.jsp?a=0CCA),
+//!       [`'\u{0CCB}'` KANNADA VOWEL SIGN OO](https://util.unicode.org/UnicodeJsps/character.jsp?a=0CCB),
+//!       [`'\u{1B3B}'` BALINESE VOWEL SIGN RA REPA TEDUNG](https://util.unicode.org/UnicodeJsps/character.jsp?a=1B3B),
+//!       [`'\u{1B3D}'` BALINESE VOWEL SIGN LA LENGA TEDUNG](https://util.unicode.org/UnicodeJsps/character.jsp?a=1B3D), and
+//!       [`'\u{1B43}'` BALINESE VOWEL SIGN PEPET TEDUNG](https://util.unicode.org/UnicodeJsps/character.jsp?a=1B43),
+//!    5. [`'\0'` NUL](https://util.unicode.org/UnicodeJsps/character.jsp?a=0000).
+//! 5. The [control characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BCc%7D)
+//!    have no defined width, and are considered to have width 0 when contained within a string.
+//! 6. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DW%7D%5Cp%7BEast_Asian_Width%3DF%7D)
+//!    with an [`East_Asian_Width`](https://www.unicode.org/reports/tr11/#ED1) of [`Fullwidth` (`F`)](https://www.unicode.org/reports/tr11/#ED2)
+//!    or [`Wide` (`W`)](https://www.unicode.org/reports/tr11/#ED4) have width 2.
+//! 7. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D)
+//!    with an `East_Asian_Width` of [`Ambiguous` (`A`)](https://www.unicode.org/reports/tr11/#ED6)
+//!    have width 2 in an East Asian context, and width 1 otherwise.
+//! 8. All other characters have width 1.
 
-#![deny(missing_docs, unsafe_code)]
+#![forbid(unsafe_code)]
+#![deny(missing_docs)]
 #![doc(
     html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
     html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png"
 )]
-#![cfg_attr(feature = "bench", feature(test))]
 #![no_std]
 
-#[cfg(test)]
-#[macro_use]
-extern crate std;
-
-#[cfg(feature = "bench")]
-extern crate test;
-
 use tables::charwidth as cw;
 pub use tables::UNICODE_VERSION;
 
 mod tables;
 
-#[cfg(test)]
-mod tests;
-
 /// Methods for determining displayed width of Unicode characters.
 pub trait UnicodeWidthChar {
     /// Returns the character's displayed width in columns, or `None` if the
@@ -108,6 +136,10 @@ pub trait UnicodeWidthStr {
     /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
     /// as 1 column wide. This is consistent with the recommendations for
     /// non-CJK contexts, or when the context cannot be reliably determined.
+    ///
+    /// Also consistent with UAX11, this function treats [emoji presentation sequences](https://www.unicode.org/reports/tr51/#def_emoji_presentation_sequence)
+    /// as 2 columns wide. This means that the width of a string may not equal
+    /// the sum of the widths of its individual characters.
     fn width(&self) -> usize;
 
     /// Returns the string's displayed width in columns.
@@ -118,17 +150,38 @@ pub trait UnicodeWidthStr {
     /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
     /// as 2 column wide. This is consistent with the recommendations for
     /// CJK contexts.
+    ///
+    /// Also consistent with UAX11, this function treats [emoji presentation sequences](https://www.unicode.org/reports/tr51/#def_emoji_presentation_sequence)
+    /// as 2 columns wide. This means that the width of a string may not equal
+    /// the sum of the widths of its individual characters.
     fn width_cjk(&self) -> usize;
 }
 
 impl UnicodeWidthStr for str {
     #[inline]
     fn width(&self) -> usize {
-        self.chars().map(|c| cw::width(c, false).unwrap_or(0)).sum()
+        str_width(self, false)
     }
 
     #[inline]
     fn width_cjk(&self) -> usize {
-        self.chars().map(|c| cw::width(c, true).unwrap_or(0)).sum()
+        str_width(self, true)
     }
 }
+
+fn str_width(s: &str, is_cjk: bool) -> usize {
+    s.chars()
+        .rfold((0, false), |(sum, was_fe0f), c| {
+            if c == '\u{FE0F}' {
+                (sum, true)
+            } else {
+                let add = if was_fe0f && cw::starts_emoji_presentation_seq(c) {
+                    2
+                } else {
+                    cw::width(c, is_cjk).unwrap_or(0)
+                };
+                (sum + add, false)
+            }
+        })
+        .0
+}
diff --git a/src/tables.rs b/src/tables.rs
index 8e2e9eb..fa76684 100644
--- a/src/tables.rs
+++ b/src/tables.rs
@@ -57,6 +57,36 @@ pub mod charwidth {
         }
     }
 
+    /// Whether this character forms an [emoji presentation sequence]
+    /// (https://www.unicode.org/reports/tr51/#def_emoji_presentation_sequence)
+    /// when followed by `'\u{FEOF}'`.
+    /// Emoji presentation sequences are considered to have width 2.
+    /// This may spuriously return `true` or `false` for characters that are always wide.
+    #[inline]
+    pub fn starts_emoji_presentation_seq(c: char) -> bool {
+        let cp: u32 = c.into();
+
+        // First level of lookup uses all but 10 LSB
+        let top_bits = cp >> 10;
+        let idx_of_leaf: usize = match top_bits {
+            0 => 0,
+            8 => 1,
+            9 => 2,
+            10 => 3,
+            124 => 4,
+            125 => 5,
+            _ => return false,
+        };
+
+        // Extract the 3-9th (0-indexed) least significant bits of `cp`,
+        // and use them to index into `leaf_row`.
+        let idx_within_leaf = usize::try_from((cp >> 3) & 0x7F).unwrap();
+        let leaf_byte = EMOJI_PRESENTATION_LEAVES.0[idx_of_leaf][idx_within_leaf];
+
+        // Use the 3 LSB of `cp` to index into `leaf_byte`.
+        ((leaf_byte >> (cp & 7)) & 1) == 1
+    }
+
     /// Returns the [UAX #11](https://www.unicode.org/reports/tr11/) based width of `c`, or
     /// `None` if `c` is a control character other than `'\x00'`.
     /// If `is_cjk == true`, ambiguous width characters are treated as double width; otherwise,
@@ -538,4 +568,84 @@ pub mod charwidth {
         0x55, 0xAA, 0xAA, 0x56, 0x55, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x5F,
     ];
+
+    #[repr(align(128))]
+    struct Align128<T>(T);
+
+    /// Array of 1024-bit bitmaps. Index into the correct (obtained from `EMOJI_PRESENTATION_INDEX`)
+    /// bitmap with the 10 LSB of your codepoint to get whether it can start an emoji presentation seq.
+    static EMOJI_PRESENTATION_LEAVES: Align128<[[u8; 128]; 6]> = Align128([
+        [
+            0x00, 0x00, 0x00, 0x00, 0x08, 0x04, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00,
+        ],
+        [
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x02, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x03, 0x00, 0x06, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x0C, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0xFE,
+            0x0F, 0x07,
+        ],
+        [
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x40, 0x00,
+            0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x1F, 0x40, 0x32, 0x21, 0x4D, 0xC4,
+            0x00, 0x07, 0x05, 0xFF, 0x0F, 0x80, 0x69, 0x01, 0x00, 0xC8, 0x00, 0x00, 0xFC, 0x1A,
+            0x83, 0x0C, 0x03, 0x60, 0x30, 0xC1, 0x1A, 0x00, 0x00, 0x06, 0xBF, 0x27, 0x24, 0xBF,
+            0x54, 0x20, 0x02, 0x01, 0x18, 0x00, 0x90, 0x50, 0xB8, 0x00, 0x18, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0xE0, 0x00, 0x02, 0x00, 0x01, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00,
+        ],
+        [
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE0, 0x00,
+            0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00,
+        ],
+        [
+            0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x03, 0xC0, 0x00, 0x40, 0xFE, 0x07, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0x0F, 0xFF, 0x01, 0x03, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xF3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xCF, 0xCE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xB9, 0xFF,
+        ],
+        [
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0xBF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0x7E,
+            0xFF, 0xFF, 0xFF, 0x80, 0xF9, 0x07, 0x80, 0x3C, 0x61, 0x00, 0x30, 0x01, 0x06, 0x10,
+            0x1C, 0x00, 0x0E, 0x70, 0x0A, 0x81, 0x08, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0xF8, 0xE7, 0xF0, 0x3F, 0x1A, 0xF9, 0x1F, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x0F,
+            0x01, 0x00,
+        ],
+    ]);
 }
diff --git a/src/tests.rs b/tests/tests.rs
similarity index 62%
rename from src/tests.rs
rename to tests/tests.rs
index 9e3805b..47218e4 100644
--- a/src/tests.rs
+++ b/tests/tests.rs
@@ -8,112 +8,10 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-#[cfg(feature = "bench")]
-use super::{UnicodeWidthChar, UnicodeWidthStr};
-#[cfg(feature = "bench")]
-use std::iter;
-#[cfg(feature = "bench")]
-use test::Bencher;
+use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
 
-use std::prelude::v1::*;
-
-#[cfg(feature = "bench")]
-#[bench]
-fn cargo(b: &mut Bencher) {
-    let string = iter::repeat('a').take(4096).collect::<String>();
-
-    b.iter(|| {
-        for c in string.chars() {
-            test::black_box(UnicodeWidthChar::width(c));
-        }
-    });
-}
-
-#[cfg(feature = "bench")]
-#[bench]
-#[allow(deprecated)]
-fn stdlib(b: &mut Bencher) {
-    let string = iter::repeat('a').take(4096).collect::<String>();
-
-    b.iter(|| {
-        for c in string.chars() {
-            test::black_box(c.width());
-        }
-    });
-}
-
-#[cfg(feature = "bench")]
-#[bench]
-fn simple_if(b: &mut Bencher) {
-    let string = iter::repeat('a').take(4096).collect::<String>();
-
-    b.iter(|| {
-        for c in string.chars() {
-            test::black_box(simple_width_if(c));
-        }
-    });
-}
-
-#[cfg(feature = "bench")]
-#[bench]
-fn simple_match(b: &mut Bencher) {
-    let string = iter::repeat('a').take(4096).collect::<String>();
-
-    b.iter(|| {
-        for c in string.chars() {
-            test::black_box(simple_width_match(c));
-        }
-    });
-}
-
-#[cfg(feature = "bench")]
-#[inline]
-fn simple_width_if(c: char) -> Option<usize> {
-    let cu = c as u32;
-    if cu < 127 {
-        if cu > 31 {
-            Some(1)
-        } else if cu == 0 {
-            Some(0)
-        } else {
-            None
-        }
-    } else {
-        UnicodeWidthChar::width(c)
-    }
-}
-
-#[cfg(feature = "bench")]
-#[inline]
-fn simple_width_match(c: char) -> Option<usize> {
-    match c as u32 {
-        cu if cu == 0 => Some(0),
-        cu if cu < 0x20 => None,
-        cu if cu < 0x7f => Some(1),
-        _ => UnicodeWidthChar::width(c),
-    }
-}
-#[cfg(feature = "bench")]
-#[bench]
-fn enwik8(b: &mut Bencher) {
-    // To benchmark, download & unzip `enwik8` from https://data.deepai.org/enwik8.zip
-    let data_path = "bench_data/enwik8";
-    let string = std::fs::read_to_string(data_path).unwrap_or_default();
-    b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
-}
-#[cfg(feature = "bench")]
-#[bench]
-fn jawiki(b: &mut Bencher) {
-    // To benchmark, download & extract `jawiki-20220501-pages-articles-multistream-index.txt` from
-    // https://dumps.wikimedia.org/jawiki/20220501/jawiki-20220501-pages-articles-multistream-index.txt.bz2
-    let data_path = "bench_data/jawiki-20220501-pages-articles-multistream-index.txt";
-    let string = std::fs::read_to_string(data_path).unwrap_or_default();
-    b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
-}
 #[test]
 fn test_str() {
-    use super::UnicodeWidthStr;
-
     assert_eq!(UnicodeWidthStr::width("ｈｅｌｌｏ"), 10);
     assert_eq!("ｈｅｌｌｏ".width_cjk(), 10);
     assert_eq!(UnicodeWidthStr::width("\0\0\0\x01\x01"), 0);
@@ -130,8 +28,6 @@ fn test_str() {
 #[test]
 fn test_emoji() {
     // Example from the README.
-    use super::UnicodeWidthStr;
-
     assert_eq!(UnicodeWidthStr::width("👩"), 2); // Woman
     assert_eq!(UnicodeWidthStr::width("🔬"), 2); // Microscope
     assert_eq!(UnicodeWidthStr::width("👩‍🔬"), 4); // Woman scientist
@@ -139,8 +35,6 @@ fn test_emoji() {
 
 #[test]
 fn test_char() {
-    use super::UnicodeWidthChar;
-
     assert_eq!(UnicodeWidthChar::width('ｈ'), Some(2));
     assert_eq!('ｈ'.width_cjk(), Some(2));
     assert_eq!(UnicodeWidthChar::width('\x00'), Some(0));
@@ -153,8 +47,6 @@ fn test_char() {
 
 #[test]
 fn test_char2() {
-    use super::UnicodeWidthChar;
-
     assert_eq!(UnicodeWidthChar::width('\x00'), Some(0));
     assert_eq!('\x00'.width_cjk(), Some(0));
 
@@ -182,15 +74,11 @@ fn test_char2() {
 
 #[test]
 fn unicode_12() {
-    use super::UnicodeWidthChar;
-
     assert_eq!(UnicodeWidthChar::width('\u{1F971}'), Some(2));
 }
 
 #[test]
 fn test_default_ignorable() {
-    use super::UnicodeWidthChar;
-
     assert_eq!(UnicodeWidthChar::width('\u{E0000}'), Some(0));
 
     assert_eq!(UnicodeWidthChar::width('\u{1160}'), Some(0));
@@ -200,8 +88,6 @@ fn test_default_ignorable() {
 
 #[test]
 fn test_jamo() {
-    use super::UnicodeWidthChar;
-
     assert_eq!(UnicodeWidthChar::width('\u{1100}'), Some(2));
     assert_eq!(UnicodeWidthChar::width('\u{A97C}'), Some(2));
     // Special case: U+115F HANGUL CHOSEONG FILLER
@@ -214,8 +100,6 @@ fn test_jamo() {
 
 #[test]
 fn test_prepended_concatenation_marks() {
-    use super::UnicodeWidthChar;
-
     assert_eq!(UnicodeWidthChar::width('\u{0600}'), Some(1));
     assert_eq!(UnicodeWidthChar::width('\u{070F}'), Some(1));
     assert_eq!(UnicodeWidthChar::width('\u{08E2}'), Some(1));
@@ -224,8 +108,6 @@ fn test_prepended_concatenation_marks() {
 
 #[test]
 fn test_interlinear_annotation_chars() {
-    use super::UnicodeWidthChar;
-
     assert_eq!(UnicodeWidthChar::width('\u{FFF9}'), Some(1));
     assert_eq!(UnicodeWidthChar::width('\u{FFFA}'), Some(1));
     assert_eq!(UnicodeWidthChar::width('\u{FFFB}'), Some(1));
@@ -233,8 +115,6 @@ fn test_interlinear_annotation_chars() {
 
 #[test]
 fn test_hieroglyph_format_controls() {
-    use super::UnicodeWidthChar;
-
     assert_eq!(UnicodeWidthChar::width('\u{13430}'), Some(1));
     assert_eq!(UnicodeWidthChar::width('\u{13436}'), Some(1));
     assert_eq!(UnicodeWidthChar::width('\u{1343C}'), Some(1));
@@ -242,8 +122,6 @@ fn test_hieroglyph_format_controls() {
 
 #[test]
 fn test_marks() {
-    use super::UnicodeWidthChar;
-
     // Nonspacing marks have 0 width
     assert_eq!(UnicodeWidthChar::width('\u{0301}'), Some(0));
     // Enclosing marks have 0 width
@@ -256,8 +134,6 @@ fn test_marks() {
 
 #[test]
 fn test_canonical_equivalence() {
-    use super::{UnicodeWidthChar, UnicodeWidthStr};
-
     for c in '\0'..='\u{10FFFF}' {
         let mut nfd = String::new();
         unicode_normalization::char::decompose_canonical(c, |d| nfd.push(d));
@@ -272,3 +148,20 @@ fn test_canonical_equivalence() {
         //assert_eq!(c.width_cjk().unwrap_or(0), nfd.width_cjk(), "{c}, {nfd}");
     }
 }
+
+#[test]
+fn test_emoji_presentation() {
+    assert_eq!(UnicodeWidthChar::width('\u{0023}'), Some(1));
+    assert_eq!(UnicodeWidthChar::width('\u{FE0F}'), Some(0));
+    assert_eq!(UnicodeWidthStr::width("\u{0023}\u{FE0F}"), 2);
+    assert_eq!(UnicodeWidthStr::width("a\u{0023}\u{FE0F}a"), 4);
+    assert_eq!(UnicodeWidthStr::width("\u{0023}a\u{FE0F}"), 2);
+    assert_eq!(UnicodeWidthStr::width("a\u{FE0F}"), 1);
+    assert_eq!(UnicodeWidthStr::width("\u{0023}\u{0023}\u{FE0F}a"), 4);
+
+    assert_eq!(UnicodeWidthStr::width("\u{002A}\u{FE0F}"), 2);
+    assert_eq!(UnicodeWidthStr::width("\u{23F9}\u{FE0F}"), 2);
+    assert_eq!(UnicodeWidthStr::width("\u{24C2}\u{FE0F}"), 2);
+    assert_eq!(UnicodeWidthStr::width("\u{1F6F3}\u{FE0F}"), 2);
+    assert_eq!(UnicodeWidthStr::width("\u{1F700}\u{FE0F}"), 1);
+}