<?xml version="1.0"?>
<!DOCTYPE bioml SYSTEM "bioml.dtd">
<bioml>
<note>
The following is a valid BIOML file describing the gene
and the gene product that becomes human insulin.
</note>
<organism>
<species>Homo sapiens</species>
<chromosome number="11">
<locus label="HUMINS locus">
<gene label="Insulin gene">
<dna start="1" end="4992" label="Complete HUMINS sequence">
<ddomain start="1" end="2185" label="flanking domain"/>
<ddomain start="1340" end="1823" label="polymorphic domain"/>
<ddomain start="2424" end="2495" label="Signal peptide"/>
<ddomain start="2496" end="2585" label="Chain B"/>
<ddomain start="2586" end="2610" label="Chain C(1)"/>
<ddomain start="3397" end="3476" label="Chain C(2)"/>
<ddomain start="3477" end="3539" label="Chain A"/>
<exon start="2186" end="2227" label="Exon 1"/>
<intron start="2228" end="2406" label="Intron 1"/>
<exon start="2407" end="2610" label="Exon 2"/>
<intron start="2611" end="3396" label="Intron 2"/>
<exon start="3397" end="3615" label="Exon 3"/>
<ddomain start="3615" end="4992" label="flanking domain"/>
<comment>
The browser will ignore any symbol that cannot be a nucleotide
residue, so the numbers can remain in place to aid the author.
</comment>
1 ctcgaggggc ctagacattg ccctccagag agagcaccca acaccctcca ggcttgaccg
61 gccagggtgt ccccttccta ccttggagag agcagcccca gggcatcctg cagggggtgc
121 tgggacacca gctggccttc aaggtctctg cctccctcca gccaccccac tacacgctgc
181 tgggatcctg gatctcagct ccctggccga caacactggc aaactcctac tcatccacga
241 aggccctcct gggcatggtg gtccttccca gcctggcagt ctgttcctca cacaccttgt
301 tagtgcccag cccctgaggt tgcagctggg ggtgtctctg aagggctgtg agcccccagg
361 aagccctggg gaagtgcctg ccttgcctcc ccccggccct gccagcgcct ggctctgccc
421 tcctacctgg gctcccccca tccagcctcc ctccctacac actcctctca aggaggcacc
481 catgtcctct ccagctgccg ggcctcagag cactgtggcg tcctggggca gccaccgcat
541 gtcctgctgt ggcatggctc agggtggaaa gggcggaagg gaggggtcct gcagatagct
601 ggtgcccact accaaacccg ctcggggcag gagagccaaa ggctgggtgt gtgcagagcg
661 gccccgagag gttccgaggc tgaggccagg gtgggacata gggatgcgag gggccggggc
721 acaggatact ccaacctgcc tgcccccatg gtctcatcct cctgcttctg ggacctcctg
781 atcctgcccc tggtgctaag aggcaggtaa ggggctgcag gcagcagggc tcggagccca
841 tgccccctca ccatgggtca ggctggacct ccaggtgcct gttctgggga gctgggaggg
901 ccggaggggt gtaccccagg ggctcagccc agatgacact atgggggtga tggtgtcatg
961 ggacctggcc aggagagggg agatgggctc ccagaagagg agtgggggct gagagggtgc
1021 ctggggggcc aggacggagc tgggccagtg cacagcttcc cacacctgcc cacccccaga
1081 gtcctgccgc cacccccaga tcacacggaa gatgaggtcc gagtggcctg ctgaggactt
1141 gctgcttgtc cccaggtccc caggtcatgc cctccttctg ccaccctggg gagctgaggg
1201 cctcagctgg ggctgctgtc ctaaggcagg gtgggaacta ggcagccagc agggagggga
1261 cccctccctc actcccactc tcccaccccc accaccttgg cccatccatg gcggcatctt
1321 gggccatccg ggactgggga caggggtcct ggggacaggg gtccggggac agggtcctgg
1381 ggacaggggt gtggggacag gggtctgggg acaggggtgt ggggacaggg gtgtggggac
1441 aggggtctgg ggacaggggt gtggggacag gggtccgggg acaggggtgt ggggacaggg
1501 gtctggggac aggggtgtgg ggacaggggt gtggggacag gggtctgggg acaggggtgt
1561 ggggacaggg gtcctgggga caggggtgtg gggacagggg tgtggggaca ggggtgtggg
1621 gacaggggtg tggggacagg ggtcctgggg ataggggtgt ggggacaggg gtgtggggac
1681 aggggtcccg gggacagggg tgtggggaca ggggtgtggg gacaggggtc ctggggacag
1741 gggtctgagg acaggggtgt gggcacaggg gtcctgggga caggggtcct ggggacaggg
1801 gtcctgggga caggggtctg gggacagcag cgcaaagagc cccgccctgc agcctccagc
1861 tctcctggtc taatgtggaa agtggcccag gtgagggctt tgctctcctg gagacatttg
1921 cccccagctg tgagcaggga caggtctggc caccgggccc ctggttaaga ctctaatgac
1981 ccgctggtcc tgaggaagag gtgctgacga ccaaggagat cttcccacag acccagcacc
2041 agggaaatgg tccggaaatt gcagcctcag cccccagcca tctgccgacc cccccacccc
2101 gccctaatgg gccaggcggc aggggttgac aggtagggga gatgggctct gagactataa
2161 agccagcggg ggcccagcag ccctcagccc tccaggacag gctgcatcag aagaggccat
2221 caagcaggtc tgttccaagg gcctttgcgt caggtgggct cagggttcca gggtggctgg
2281 accccaggcc ccagctctgc agcagggagg acgtggctgg gctcgtgaag catgtggggg
2341 tgagcccagg ggccccaagg cagggcacct ggccttcagc ctgcctcagc cctgcctgtc
2401 tcccagatca ctgtccttct gccatggccc tgtggatgcg cctcctgccc ctgctggcgc
2461 tgctggccct ctggggacct gacccagccg cagcctttgt gaaccaacac ctgtgcggct
2521 cacacctggt ggaagctctc tacctagtgt gcggggaacg aggcttcttc tacacaccca
2581 agacccgccg ggaggcagag gacctgcagg gtgagccaac cgcccattgc tgcccctggc
2641 cgcccccagc caccccctgc tcctggcgct cccacccagc atgggcagaa gggggcagga
2701 ggctgccacc cagcaggggg tcaggtgcac ttttttaaaa agaagttctc ttggtcacgt
2761 cctaaaagtg accagctccc tgtggcccag tcagaatctc agcctgagga cggtgttggc
2821 ttcggcagcc ccgagataca tcagagggtg ggcacgctcc tccctccact cgcccctcaa
2881 acaaatgccc cgcagcccat ttctccaccc tcatttgatg accgcagatt caagtgtttt
2941 gttaagtaaa gtcctgggtg acctggggtc acagggtgcc ccacgctgcc tgcctctggg
3001 cgaacacccc atcacgcccg gaggagggcg tggctgcctg cctgagtggg ccagacccct
3061 gtcgccagcc tcacggcagc tccatagtca ggagatgggg aagatgctgg ggacaggccc
3121 tggggagaag tactgggatc acctgttcag gctcccactg tgacgctgcc ccggggcggg
3181 ggaaggaggt gggacatgtg ggcgttgggg cctgtaggtc cacacccagt gtgggtgacc
3241 ctccctctaa cctgggtcca gcccggctgg agatgggtgg gagtgcgacc tagggctggc
3301 gggcaggcgg gcactgtgtc tccctgactg tgtcctcctg tgtccctctg cctcgccgct
3361 gttccggaac ctgctctgcg cggcacgtcc tggcagtggg gcaggtggag ctgggcgggg
3421 gccctggtgc aggcagcctg cagcccttgg ccctggaggg gtccctgcag aagcgtggca
3481 ttgtggaaca atgctgtacc agcatctgct ccctctacca gctggagaac tactgcaact
3541 agacgcagcc tgcaggcagc cccacacccg ccgcctcctg caccgagaga gatggaataa
3601 agcccttgaa ccagccctgc tgtgccgtct gtgtgtcttg ggggccctgg gccaagcccc
3661 acttcccggc actgttgtga gcccctccca gctctctcca cgctctctgg gtgcccacag
3721 gtgccaacgc caggcaggcc cagcatgcag tggctctccc caaagcggcc atgcctgttg
3781 gctgcctgct gcccccaccc tgtggctcag ggtccagtat gggagcttcg ggggtctctg
3841 aggggccagg gatggtgggg ccactgagaa gtgactctgt cagtagccga cctggagtcc
3901 ccagagacct tgttcaggaa agggaatgag aacattccag caattttccc cccacctagc
3961 cctcccaggt tctattttta gagttatttc tgatggagtc cctgtggagg gaggaggctg
4021 ggctgaggga gggggtcctg cagggcgggg ggctgggaag gtggggagag gctgccgaga
4081 gccacccgct atccccagct ctgggcagcc ccgggacagt cacacaccct ggcctcgcgg
4141 cccaagctgg cagccgtctg cagccacagc ttatgccagc ccaggtccag ccagacacct
4201 gagggaccca ctggtgcctt ggaggaagca ggagaggtca gatggcacca tgagctgggg
4261 caggtgcagg gaccgtggca gcacctggca gggcctcaga acccatgcct tgggcacccc
4321 ggccatgagg ccctgaggat tgcagcccaa gagaagcagg gaacgccagg gccacagggg
4381 cagagaccag gccagggtcc cttgcggccc ttagcccacc ccctcccagt aagcaggggc
4441 tgcttggcta ggcttccttt tgctacagac ctgctgctca cccagaggcc cacgggccct
4501 agtgacaagg tcgttgtggc tccaggtcct tgggggtcct gacacagagc ctcttctgca
4561 gcacccctga ggacagggtg ctccgctggg cacccagcct agtgggcaga cgagaaccta
4621 ggggctgcct gggcctactg tggcctggga ggtcagcggg tgaccctagc taccctgtgg
4681 ctgggccagt ctgcctgcca cccaggccaa accaatctgc acctttcctg agagctccac
4741 ccagggctgg gctggggatg gctgggcctg gggctggcat gggctgtggc tgcagaccac
4801 tgccagcttg ggcctcgagg ccaggagctc accctccagc tgccccgcct ccagagtggg
4861 ggccagggct gggcaggcgg gtggacggcc ggacactggc cccggaagag gagggaggcg
4921 gtggctggga tcggcagcag ccgtccatgg gaacacccag ccggccccac tcgcacgggt
4981 agagacaggc gc
</dna>
</gene>
</locus>
</chromosome>
<protein comp="6xS[1]">
<name>Insulin</name>
<subunit id="1" comp="1xP[1]D[3]+1xP[1]D[7]">
<peptide id="1" start="1" end="110">
<db_entry entry="INS_HUMAN" format="SWISSPROT"/>
<db_entry entry="IPHU" format="PIR"/>
<domain id="1" type="signal" start="1" end="24"/>
<domain id="2" type="helix" start="33" end="46"/>
<domain id="3" type="mature" start="25" end="54">
<name>Chain B</name>
<aa type="C" at="31" to="96"/>
<aa type="H" at="34">
<avariant at="34" type="D"/></aa>
<aa type="C" at="43" to="109"/>
<aa type="F" at="48">
<avariant type="S" at="48"/></aa>
<aa type="F" at="49">
<avariant at="49" type="L"/></aa>
</domain>
<domain id="4" type="propeptide" start="55" end="89">
<name>Chain C</name>
<aa type="R" at="89">
<avariant at="89" type="H"/>
<avariant at="89" type="L"/></aa>
</domain>
<domain id="5" type="helix" start="91" end="95"/>
<domain id="6" type="helix" start="102" end="108"/>
<domain id="7" type="mature" start="90" end="110">
<name>Chain A</name>
<aa type="V" at="92">
<avariant type="L"/></aa>
<aa type="C" at="95" to="100"/>
<aa type="C" at="96" to="31"/>
<aa type="C" at="100" to="95"/>
<aa type="C" at="109" to="43"/>
</domain>
MALWMRLLPL LALLALWGPD PAAAFVNQHL CGSHLVEALY LVCGERGFFY
TPKTRREAED LQVGQVELGG GPGAGSLQPL ALEGSLQKRG IVEQCCTSIC
SLYQLENYCN
</peptide>
</subunit>
</protein>
</organism>
</bioml>