unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
blob c2ee3f2118e4e3e69a848a677f29f9c257a17461 5068 bytes (raw)
name: admin/unidata/emoji-zwj.awk 	 # note: path name is non-authoritative(*)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
 
#!/usr/bin/awk -f

## Copyright (C) 2020, 2022-2023 Free Software Foundation, Inc.

## Author: Robert Pluim <rpluim@gmail.com>

## This file is part of GNU Emacs.

## GNU Emacs is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## GNU Emacs is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.

### Commentary:

## This script takes as input Unicode's emoji-zwj-sequences.txt
## and produces output for Emacs's lisp/international/emoji-zwj.el.
## It also outputs the composition sequences for flags, UK flags, and
## skin tones which have been derived from emoji-sequences.txt by hand.

## For additional details, see <https://debbugs.gnu.org/39799#8>.

## Things to do after installing a new version of
## emoji-zwj-sequences.txt and emoji-sequences.txt
## Check the output against the old output.  See if there are any new
## composition sequences in emoji-sequences.txt that that need to be
## added Rebuild emacs, visit emoji-zwj-sequences.txt and
## emoji-sequences.txt and check that the various sequences are being
## composed properly.  Don't forget to install an appropriate font,
## such as Noto Color Emoji.

### Code:

/^[0-9A-F].*; RGI_Emoji_(ZWJ|Modifier)_Sequence/ {
    sub(/ *;.*/, "", $0)
    num = split($0, elts)
    if (ch[elts[1]] == "")
    {
        vec[elts[1]] = ""
        ch[elts[1]] = elts[1]
    }
     else
     {
         vec[elts[1]] = vec[elts[1]] "\n"
     }
     vec[elts[1]] = vec[elts[1]] "\""
    for (j = 1; j <= num; j++)
    {
        c = sprintf("\\N{U+%s}", elts[j])
        vec[elts[1]] = vec[elts[1]] c
    }
    vec[elts[1]] = vec[elts[1]] "\""
}

# The following codepoints may or may not be emoji, but they are part
# of emoji sequences.  We have code in font.c:font_range that will try
# to display them with the emoji font anyway.
/^[0-9A-F]+ FE0F *; emoji style;/ {
    sub(/ *FE0F .*/, "", $0)
    trigger_codepoints[$0] = $0
}

END {
     print ";;; emoji-zwj.el --- emoji zwj character composition table  -*- lexical-binding:t -*-"
     print ";;; Automatically generated from admin/unidata/emoji-{zwj-,}sequences.txt"
     print "(eval-when-compile (require 'regexp-opt))"

     printf "(setq auto-composition-emoji-eligible-codepoints\n"
     printf "'("

     for (trig in trigger_codepoints)
     {
         printf("\n?\\N{U+%s}", trig)
     }
     printf "\n))\n\n"

     #  We add entries for 'codepoint U+FE0F' here to ensure that the
     # code in font_range is triggered.

     for (trig in trigger_codepoints)
     {
         c = sprintf("\\N{U+%s}", trig)
         vec[trig] = vec[trig] "\n\"" c "\\N{U+FE0F}\""
     }

     print "(dolist (elt `("

     for (elt in ch)
    {
        printf("(#x%s .\n,(eval-when-compile (regexp-opt\n'(\n%s\n))))\n", elt, vec[elt])
    }
     print "))"
     print "  (set-char-table-range composition-function-table"
     print "                        (car elt)"
     print "                        (nconc (char-table-range composition-function-table (car elt))"
     print "                               (list (vector (cdr elt)"
     print "                                             0"
     print "                                             #'compose-gstring-for-graphic)))))"

     print ";; The following two blocks are derived by hand from emoji-sequences.txt"
     print ";; FIXME: add support for Emoji_Keycap_Sequence once we learn how to respect FE0F/VS-16"
     print ";; for ASCII characters."

     print ";; Flags"
     print "(set-char-table-range composition-function-table"
     print "                      '(#x1F1E6 . #x1F1FF)"
     print "                      (nconc (char-table-range composition-function-table '(#x1F1E6 . #x1F1FF))"
     print "                             (list (vector \"[\\U0001F1E6-\\U0001F1FF][\\U0001F1E6-\\U0001F1FF]\""
     print "                                           0"
     print "                                           #'compose-gstring-for-graphic))))"

     print ";; UK Flags"
     print "(set-char-table-range composition-function-table"
     print "                      #x1F3F4"
     print "                      (nconc (char-table-range composition-function-table #x1F3F4)"
     print "                             (list (vector \"\\U0001F3F4\\U000E0067\\U000E0062\\\\(?:\\U000E0065\\U000E006E\\U000E0067\\\\|\\U000E0073\\U000E0063\\U000E0074\\\\|\\U000E0077\\U000E006C\\U000E0073\\\\)\\U000E007F\""
     print "                                           0"
     print "                                           #'compose-gstring-for-graphic))))"

     printf "\n(provide 'emoji-zwj)"
}

debug log:

solving c2ee3f2118e ...
found c2ee3f2118e in https://yhetil.org/emacs-bugs/87a5xrzsph.fsf@stebalien.com/
found 7d2ff6cb900 in https://git.savannah.gnu.org/cgit/emacs.git
preparing index
index prepared:
100644 7d2ff6cb9008c26963381320d43d4b5889dda3ca	admin/unidata/emoji-zwj.awk

applying [1/1] https://yhetil.org/emacs-bugs/87a5xrzsph.fsf@stebalien.com/
diff --git a/admin/unidata/emoji-zwj.awk b/admin/unidata/emoji-zwj.awk
index 7d2ff6cb900..c2ee3f2118e 100644

Checking patch admin/unidata/emoji-zwj.awk...
Applied patch admin/unidata/emoji-zwj.awk cleanly.

index at:
100644 c2ee3f2118e4e3e69a848a677f29f9c257a17461	admin/unidata/emoji-zwj.awk

(*) Git path names are given by the tree(s) the blob belongs to.
    Blobs themselves have no identifier aside from the hash of its contents.^

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).