Skip to content

Commit 8ac60f3

Browse files
authored
add protein-translation (#56)
1 parent 6e9f54b commit 8ac60f3

File tree

15 files changed

+678
-5
lines changed

15 files changed

+678
-5
lines changed

config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,14 @@
305305
"prerequisites": [],
306306
"difficulty": 4
307307
},
308+
{
309+
"slug": "protein-translation",
310+
"name": "Protein Translation",
311+
"uuid": "246139bc-0abd-4aab-80c4-f8b6c79f0366",
312+
"practices": [],
313+
"prerequisites": [],
314+
"difficulty": 4
315+
},
308316
{
309317
"slug": "roman-numerals",
310318
"name": "Roman Numerals",
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Instructions
2+
3+
Your job is to translate RNA sequences into proteins.
4+
5+
RNA strands are made up of three-nucleotide sequences called **codons**.
6+
Each codon translates to an **amino acid**.
7+
When joined together, those amino acids make a protein.
8+
9+
In the real world, there are 64 codons, which in turn correspond to 20 amino acids.
10+
However, for this exercise, you’ll only use a few of the possible 64.
11+
They are listed below:
12+
13+
| Codon | Amino Acid |
14+
| ------------------ | ------------- |
15+
| AUG | Methionine |
16+
| UUU, UUC | Phenylalanine |
17+
| UUA, UUG | Leucine |
18+
| UCU, UCC, UCA, UCG | Serine |
19+
| UAU, UAC | Tyrosine |
20+
| UGU, UGC | Cysteine |
21+
| UGG | Tryptophan |
22+
| UAA, UAG, UGA | STOP |
23+
24+
For example, the RNA string “AUGUUUUCU” has three codons: “AUG”, “UUU” and “UCU”.
25+
These map to Methionine, Phenylalanine, and Serine.
26+
27+
## “STOP” Codons
28+
29+
You’ll note from the table above that there are three **“STOP” codons**.
30+
If you encounter any of these codons, ignore the rest of the sequence — the protein is complete.
31+
32+
For example, “AUGUUUUCUUAAAUG” contains a STOP codon (“UAA”).
33+
Once we reach that point, we stop processing.
34+
We therefore only consider the part before it (i.e. “AUGUUUUCU”), not any further codons after it (i.e. “AUG”).
35+
36+
Learn more about [protein translation on Wikipedia][protein-translation].
37+
38+
[protein-translation]: https://en.wikipedia.org/wiki/Translation_(biology)
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
namespace ProteinTranslation
2+
3+
inductive Protein where
4+
| Methionine : Protein
5+
| Phenylalanine : Protein
6+
| Leucine : Protein
7+
| Serine : Protein
8+
| Tyrosine : Protein
9+
| Cysteine : Protein
10+
| Tryptophan : Protein
11+
deriving BEq, Repr
12+
13+
def codonToProtein : String -> Option Protein
14+
| "AUG" => some .Methionine
15+
| "UUU" => some .Phenylalanine
16+
| "UUC" => some .Phenylalanine
17+
| "UUA" => some .Leucine
18+
| "UUG" => some .Leucine
19+
| "UCU" => some .Serine
20+
| "UCC" => some .Serine
21+
| "UCA" => some .Serine
22+
| "UCG" => some .Serine
23+
| "UAU" => some .Tyrosine
24+
| "UAC" => some .Tyrosine
25+
| "UGU" => some .Cysteine
26+
| "UGC" => some .Cysteine
27+
| "UGG" => some .Tryptophan
28+
| _ => none
29+
30+
def proteins (strand : String) : Except String (Array Protein) :=
31+
let rec helper := fun sequence acc =>
32+
match sequence with
33+
| [] => some acc
34+
| _ :: [] => none
35+
| _ :: _ :: [] => none
36+
| c1 :: c2 :: c3 :: cs =>
37+
let codon := [c1, c2, c3].asString
38+
match codon with
39+
| "UAA" => some acc
40+
| "UAG" => some acc
41+
| "UGA" => some acc
42+
| _ =>
43+
let maybeProtein := codonToProtein codon
44+
match maybeProtein with
45+
| some protein => helper cs (protein :: acc)
46+
| none => none
47+
let result := helper strand.toList []
48+
match result with
49+
| none => .error "Invalid codon"
50+
| some list => .ok (list.foldr (fun x acc => acc.push x) #[])
51+
52+
end ProteinTranslation
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"authors": [
3+
"oxe-i"
4+
],
5+
"files": {
6+
"solution": [
7+
"ProteinTranslation.lean"
8+
],
9+
"test": [
10+
"ProteinTranslationTest.lean"
11+
],
12+
"example": [
13+
".meta/Example.lean"
14+
]
15+
},
16+
"blurb": "Translate RNA sequences into proteins.",
17+
"source": "Tyler Long"
18+
}
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# This is an auto-generated file.
2+
#
3+
# Regenerating this file via `configlet sync` will:
4+
# - Recreate every `description` key/value pair
5+
# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
6+
# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
7+
# - Preserve any other key/value pair
8+
#
9+
# As user-added comments (using the # character) will be removed when this file
10+
# is regenerated, comments can be added via a `comment` key.
11+
12+
[2c44f7bf-ba20-43f7-a3bf-f2219c0c3f98]
13+
description = "Empty RNA sequence results in no proteins"
14+
15+
[96d3d44f-34a2-4db4-84cd-fff523e069be]
16+
description = "Methionine RNA sequence"
17+
18+
[1b4c56d8-d69f-44eb-be0e-7b17546143d9]
19+
description = "Phenylalanine RNA sequence 1"
20+
21+
[81b53646-bd57-4732-b2cb-6b1880e36d11]
22+
description = "Phenylalanine RNA sequence 2"
23+
24+
[42f69d4f-19d2-4d2c-a8b0-f0ae9ee1b6b4]
25+
description = "Leucine RNA sequence 1"
26+
27+
[ac5edadd-08ed-40a3-b2b9-d82bb50424c4]
28+
description = "Leucine RNA sequence 2"
29+
30+
[8bc36e22-f984-44c3-9f6b-ee5d4e73f120]
31+
description = "Serine RNA sequence 1"
32+
33+
[5c3fa5da-4268-44e5-9f4b-f016ccf90131]
34+
description = "Serine RNA sequence 2"
35+
36+
[00579891-b594-42b4-96dc-7ff8bf519606]
37+
description = "Serine RNA sequence 3"
38+
39+
[08c61c3b-fa34-4950-8c4a-133945570ef6]
40+
description = "Serine RNA sequence 4"
41+
42+
[54e1e7d8-63c0-456d-91d2-062c72f8eef5]
43+
description = "Tyrosine RNA sequence 1"
44+
45+
[47bcfba2-9d72-46ad-bbce-22f7666b7eb1]
46+
description = "Tyrosine RNA sequence 2"
47+
48+
[3a691829-fe72-43a7-8c8e-1bd083163f72]
49+
description = "Cysteine RNA sequence 1"
50+
51+
[1b6f8a26-ca2f-43b8-8262-3ee446021767]
52+
description = "Cysteine RNA sequence 2"
53+
54+
[1e91c1eb-02c0-48a0-9e35-168ad0cb5f39]
55+
description = "Tryptophan RNA sequence"
56+
57+
[e547af0b-aeab-49c7-9f13-801773a73557]
58+
description = "STOP codon RNA sequence 1"
59+
60+
[67640947-ff02-4f23-a2ef-816f8a2ba72e]
61+
description = "STOP codon RNA sequence 2"
62+
63+
[9c2ad527-ebc9-4ace-808b-2b6447cb54cb]
64+
description = "STOP codon RNA sequence 3"
65+
66+
[f4d9d8ee-00a8-47bf-a1e3-1641d4428e54]
67+
description = "Sequence of two protein codons translates into proteins"
68+
69+
[dd22eef3-b4f1-4ad6-bb0b-27093c090a9d]
70+
description = "Sequence of two different protein codons translates into proteins"
71+
72+
[d0f295df-fb70-425c-946c-ec2ec185388e]
73+
description = "Translate RNA strand into correct protein list"
74+
75+
[e30e8505-97ec-4e5f-a73e-5726a1faa1f4]
76+
description = "Translation stops if STOP codon at beginning of sequence"
77+
78+
[5358a20b-6f4c-4893-bce4-f929001710f3]
79+
description = "Translation stops if STOP codon at end of two-codon sequence"
80+
81+
[ba16703a-1a55-482f-bb07-b21eef5093a3]
82+
description = "Translation stops if STOP codon at end of three-codon sequence"
83+
84+
[4089bb5a-d5b4-4e71-b79e-b8d1f14a2911]
85+
description = "Translation stops if STOP codon in middle of three-codon sequence"
86+
87+
[2c2a2a60-401f-4a80-b977-e0715b23b93d]
88+
description = "Translation stops if STOP codon in middle of six-codon sequence"
89+
90+
[f6f92714-769f-4187-9524-e353e8a41a80]
91+
description = "Sequence of two non-STOP codons does not translate to a STOP codon"
92+
93+
[1e75ea2a-f907-4994-ae5c-118632a1cb0f]
94+
description = "Non-existing codon can't translate"
95+
include = false
96+
97+
[9eac93f3-627a-4c90-8653-6d0a0595bc6f]
98+
description = "Unknown amino acids, not part of a codon, can't translate"
99+
reimplements = "1e75ea2a-f907-4994-ae5c-118632a1cb0f"
100+
101+
[9d73899f-e68e-4291-b1e2-7bf87c00f024]
102+
description = "Incomplete RNA sequence can't translate"
103+
104+
[43945cf7-9968-402d-ab9f-b8a28750b050]
105+
description = "Incomplete RNA sequence can translate if valid until a STOP codon"
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
namespace ProteinTranslation
2+
3+
inductive Protein where
4+
| Methionine : Protein
5+
| Phenylalanine : Protein
6+
| Leucine : Protein
7+
| Serine : Protein
8+
| Tyrosine : Protein
9+
| Cysteine : Protein
10+
| Tryptophan : Protein
11+
deriving BEq, Repr
12+
13+
def proteins (strand : String) : Except String (Array Protein) :=
14+
sorry
15+
16+
end ProteinTranslation
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import LeanTest
2+
import ProteinTranslation
3+
4+
open LeanTest
5+
6+
instance {α β} [BEq α] [BEq β] : BEq (Except α β) where
7+
beq
8+
| .ok a, .ok b => a == b
9+
| .error e1, .error e2 => e1 == e2
10+
| _, _ => false
11+
12+
def proteinTranslationTests : TestSuite :=
13+
(TestSuite.empty "ProteinTranslation")
14+
|>.addTest "Empty RNA sequence results in no proteins" (do
15+
return assertEqual (.ok #[]) (ProteinTranslation.proteins ""))
16+
|>.addTest "Methionine RNA sequence" (do
17+
return assertEqual (.ok #[.Methionine]) (ProteinTranslation.proteins "AUG"))
18+
|>.addTest "Phenylalanine RNA sequence 1" (do
19+
return assertEqual (.ok #[.Phenylalanine]) (ProteinTranslation.proteins "UUU"))
20+
|>.addTest "Phenylalanine RNA sequence 2" (do
21+
return assertEqual (.ok #[.Phenylalanine]) (ProteinTranslation.proteins "UUC"))
22+
|>.addTest "Leucine RNA sequence 1" (do
23+
return assertEqual (.ok #[.Leucine]) (ProteinTranslation.proteins "UUA"))
24+
|>.addTest "Leucine RNA sequence 2" (do
25+
return assertEqual (.ok #[.Leucine]) (ProteinTranslation.proteins "UUG"))
26+
|>.addTest "Serine RNA sequence 1" (do
27+
return assertEqual (.ok #[.Serine]) (ProteinTranslation.proteins "UCU"))
28+
|>.addTest "Serine RNA sequence 2" (do
29+
return assertEqual (.ok #[.Serine]) (ProteinTranslation.proteins "UCC"))
30+
|>.addTest "Serine RNA sequence 3" (do
31+
return assertEqual (.ok #[.Serine]) (ProteinTranslation.proteins "UCA"))
32+
|>.addTest "Serine RNA sequence 4" (do
33+
return assertEqual (.ok #[.Serine]) (ProteinTranslation.proteins "UCG"))
34+
|>.addTest "Tyrosine RNA sequence 1" (do
35+
return assertEqual (.ok #[.Tyrosine]) (ProteinTranslation.proteins "UAU"))
36+
|>.addTest "Tyrosine RNA sequence 2" (do
37+
return assertEqual (.ok #[.Tyrosine]) (ProteinTranslation.proteins "UAC"))
38+
|>.addTest "Cysteine RNA sequence 1" (do
39+
return assertEqual (.ok #[.Cysteine]) (ProteinTranslation.proteins "UGU"))
40+
|>.addTest "Cysteine RNA sequence 2" (do
41+
return assertEqual (.ok #[.Cysteine]) (ProteinTranslation.proteins "UGC"))
42+
|>.addTest "Tryptophan RNA sequence" (do
43+
return assertEqual (.ok #[.Tryptophan]) (ProteinTranslation.proteins "UGG"))
44+
|>.addTest "STOP codon RNA sequence 1" (do
45+
return assertEqual (.ok #[]) (ProteinTranslation.proteins "UAA"))
46+
|>.addTest "STOP codon RNA sequence 2" (do
47+
return assertEqual (.ok #[]) (ProteinTranslation.proteins "UAG"))
48+
|>.addTest "STOP codon RNA sequence 3" (do
49+
return assertEqual (.ok #[]) (ProteinTranslation.proteins "UGA"))
50+
|>.addTest "Sequence of two protein codons translates into proteins" (do
51+
return assertEqual (.ok #[.Phenylalanine, .Phenylalanine]) (ProteinTranslation.proteins "UUUUUU"))
52+
|>.addTest "Sequence of two different protein codons translates into proteins" (do
53+
return assertEqual (.ok #[.Leucine, .Leucine]) (ProteinTranslation.proteins "UUAUUG"))
54+
|>.addTest "Translate RNA strand into correct protein list" (do
55+
return assertEqual (.ok #[.Methionine, .Phenylalanine, .Tryptophan]) (ProteinTranslation.proteins "AUGUUUUGG"))
56+
|>.addTest "Translation stops if STOP codon at beginning of sequence" (do
57+
return assertEqual (.ok #[]) (ProteinTranslation.proteins "UAGUGG"))
58+
|>.addTest "Translation stops if STOP codon at end of two-codon sequence" (do
59+
return assertEqual (.ok #[.Tryptophan]) (ProteinTranslation.proteins "UGGUAG"))
60+
|>.addTest "Translation stops if STOP codon at end of three-codon sequence" (do
61+
return assertEqual (.ok #[.Methionine, .Phenylalanine]) (ProteinTranslation.proteins "AUGUUUUAA"))
62+
|>.addTest "Translation stops if STOP codon in middle of three-codon sequence" (do
63+
return assertEqual (.ok #[.Tryptophan]) (ProteinTranslation.proteins "UGGUAGUGG"))
64+
|>.addTest "Translation stops if STOP codon in middle of six-codon sequence" (do
65+
return assertEqual (.ok #[.Tryptophan, .Cysteine, .Tyrosine]) (ProteinTranslation.proteins "UGGUGUUAUUAAUGGUUU"))
66+
|>.addTest "Sequence of two non-STOP codons does not translate to a STOP codon" (do
67+
return assertEqual (.ok #[.Methionine, .Methionine]) (ProteinTranslation.proteins "AUGAUG"))
68+
|>.addTest "Unknown amino acids, not part of a codon, can't translate" (do
69+
return assertEqual (.error "Invalid codon") (ProteinTranslation.proteins "XYZ"))
70+
|>.addTest "Incomplete RNA sequence can't translate" (do
71+
return assertEqual (.error "Invalid codon") (ProteinTranslation.proteins "AUGU"))
72+
|>.addTest "Incomplete RNA sequence can translate if valid until a STOP codon" (do
73+
return assertEqual (.ok #[.Phenylalanine, .Phenylalanine]) (ProteinTranslation.proteins "UUCUUCUAAUGGU"))
74+
75+
def main : IO UInt32 := do
76+
runTestSuitesWithExitCode [proteinTranslationTests]
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name = "protein-translation"
2+
version = "0.1.0"
3+
defaultTargets = ["ProteinTranslationTest"]
4+
testDriver = "ProteinTranslationTest"
5+
6+
[[lean_lib]]
7+
name = "LeanTest"
8+
srcDir = "vendor/LeanTest"
9+
10+
[[lean_lib]]
11+
name = "ProteinTranslation"
12+
13+
[[lean_exe]]
14+
name = "ProteinTranslationTest"
15+
root = "ProteinTranslationTest"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
leanprover/lean4:v4.25.2
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
-- This module serves as the root of the `LeanTest` library.
2+
-- Import modules here that should be built as part of the library.
3+
import LeanTest.Assertions
4+
import LeanTest.Test

0 commit comments

Comments
 (0)