Skip to content

Commit 977f3d4

Browse files
authored
Merge pull request #18 from rotu/mid-raccoon
Validate character encoding
2 parents 9f69e16 + b792562 commit 977f3d4

File tree

2 files changed

+87
-3
lines changed

2 files changed

+87
-3
lines changed

.github/workflows/check.yml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
name: Check
2+
3+
on:
4+
push:
5+
branches:
6+
- master
7+
pull_request: {}
8+
9+
jobs:
10+
check-encodings:
11+
name: Check that the encoding list is up-to-date with KSC
12+
runs-on: ubuntu-latest
13+
14+
steps:
15+
- name: Check out ksy_schema
16+
uses: actions/checkout@v4
17+
with:
18+
path: ksy_schema
19+
- name: Get list of encodings from ksy_schema
20+
working-directory: ksy_schema
21+
run: |
22+
jq '.definitions.CharacterEncoding.enum' ksy_schema.json > encodings.json
23+
24+
- name: Check out compiler
25+
uses: actions/checkout@v4
26+
with:
27+
repository: kaitai-io/kaitai_struct_compiler
28+
path: compiler
29+
- name: Set up JDK
30+
uses: actions/setup-java@v4
31+
with:
32+
distribution: temurin
33+
java-version: '21'
34+
- uses: sbt/setup-sbt@v1
35+
- name: Get list of canonical encodings from KSC
36+
working-directory: compiler
37+
# Written to work with https://github.com/kaitai-io/kaitai_struct_compiler/blob/56582ef65ca869ca43a1691a496bf4989f938675/shared/src/main/scala/io/kaitai/struct/EncodingList.scala
38+
run: |
39+
echo 'java.nio.file.Files.write(java.nio.file.Paths.get("encodings.min.json"), io.kaitai.struct.JSON.stringify(io.kaitai.struct.EncodingList.canonicalToAliasEntries.map(_._1)).getBytes(java.nio.charset.StandardCharsets.UTF_8))' \
40+
| sbt compilerJVM/console
41+
jq . encodings.min.json > encodings.json
42+
rm -f encodings.min.json
43+
44+
- name: Compare encoding lists in ksy_schema and KSC
45+
run: |
46+
git diff --no-index --exit-code -- ksy_schema/encodings.json compiler/encodings.json

ksy_schema.json

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -292,8 +292,8 @@
292292
]
293293
},
294294
"encoding": {
295-
"type": "string",
296-
"description": "default character encoding for string fields (of type `str` or `strz`) in the current type and its subtypes\n\nshould be one of the canonical encodings from [this list](https://github.com/kaitai-io/kaitai_struct_compiler/blob/0fd43b3c2186f9f87e95efcedfc6e723d82ee274/shared/src/main/scala/io/kaitai/struct/EncodingList.scala), otherwise the compiler will issue a warning (since version 0.11)"
295+
"$ref": "#/definitions/CharacterEncoding",
296+
"description": "default character encoding for string fields (of type `str` or `strz`) in the current type and its subtypes"
297297
},
298298
"endian": {
299299
"anyOf": [
@@ -479,7 +479,7 @@
479479
"pattern": "^([a-z][a-z0-9_]*::)*[a-z][a-z0-9_]*$"
480480
},
481481
"encoding": {
482-
"type": "string"
482+
"$ref": "#/definitions/CharacterEncoding"
483483
},
484484
"pad-right": {
485485
"type": "integer",
@@ -692,6 +692,44 @@
692692
{ "type": "boolean" },
693693
{ "type": "null" }
694694
]
695+
},
696+
"CharacterEncoding": {
697+
"description": "canonical names of character encodings supported by Kaitai Struct\n\nin addition to these canonical names, the compiler (since version 0.11) also recognizes their popular aliases, but issues a warning for them",
698+
"$comment": "the `enum` list must be kept in sync with https://github.com/kaitai-io/kaitai_struct_compiler/blob/master/shared/src/main/scala/io/kaitai/struct/EncodingList.scala",
699+
"enum": [
700+
"ASCII",
701+
"UTF-8",
702+
"UTF-16LE",
703+
"UTF-16BE",
704+
"UTF-32LE",
705+
"UTF-32BE",
706+
"ISO-8859-1",
707+
"ISO-8859-2",
708+
"ISO-8859-3",
709+
"ISO-8859-4",
710+
"ISO-8859-5",
711+
"ISO-8859-6",
712+
"ISO-8859-7",
713+
"ISO-8859-8",
714+
"ISO-8859-9",
715+
"ISO-8859-10",
716+
"ISO-8859-11",
717+
"ISO-8859-13",
718+
"ISO-8859-14",
719+
"ISO-8859-15",
720+
"ISO-8859-16",
721+
"windows-1250",
722+
"windows-1251",
723+
"windows-1252",
724+
"windows-1253",
725+
"windows-1254",
726+
"windows-1255",
727+
"windows-1256",
728+
"windows-1257",
729+
"windows-1258",
730+
"IBM437",
731+
"IBM866"
732+
]
695733
}
696734
}
697735
}

0 commit comments

Comments
 (0)