#!/usr/bin/perl -w ##################################################### # # peptidemap.pl # # Copyright (c) 2004 cailog.coronin.us # # Copyright (c) 2004 - Liang CAI - coronin@unc.edu # # Licensed under the GNU GPL. # # Use FASTA format protein sequence as input file, # with "-" after the predicted phosphorylation sites. # ##################################################### # 1 - clean files after the png file is generated; 0 - leave files for analysis $clean = 0; # phosp - calculate only phosphorylated peptides, please choose 1. Otherwise, please choose 0. $phosp = 1; # i - electrophoresis buffer @in = ( "pH 8.9 buffer", # 0 "pH 1.9 buffer", # 1 "pH 3.5 buffer", # 2 "pH 4.7 buffer", # 3 "pH 6.5 buffer"); # 4 $i = 0; # j - chromatography buffer @jn = ( "0", "Regular Buffer", # 1 "Phosphochromatography Buffer", # 2 "Isobutyric Buffer"); # 3 $j = 1; # symbol definitions @aasymboln = ( "c=\'k\', marker=\'o\'", # mix sites o circle symbols 0 "c=\'g\', marker=\'h\'", # pTyr v triangle down symbols 1 "c=\'b\', marker=\'p\'", # pThr D diamond symbols 2 "c=\'r\', marker=\'s\'", # pSer s square symbols 3 "c=\'y\', marker=\'^\'"); # none x cross symbols 4 @aatextn = ( "+", # mix sites "Y", # pTyr "T", # pThr "S", # pSer ""); # none print "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nGenerated by peptidemap.pl\nCopyright (c) 2004 cailog.coronin.us\nCopyright (c) 2004 - Liang CAI - coronin\@unc.edu\n\n"; print "Input a cut sequence, please put \"-\" at the start of the file name.\n\n"; print "Enter the name of the sequence file: "; $fname = ; chop($fname); unless(open(FH,$fname)) { print STDERR "\nCannot open file \"$fname\"\n"; exit; } local $/; my $input = ; close FH; my $filetype = $fname =~ s/^-//; unless ($filetype) { $input = tryptic($input); } my @sequencefile = split(/:/,$input); my $aasite = 1; $peptidecounter = 0; foreach my $sequence (@sequencefile) { my $checkphosp = $sequence =~ /-/g; if ($checkphosp >= $phosp) { ($result[$peptidecounter][0], $result[$peptidecounter][1], $result[$peptidecounter][5]) = peptidemapcore($sequence); # 0 - Y value 1 - X value 5 - AA symbol $result[$peptidecounter][2] = $sequence; # 2 - the peptide of the spot $result[$peptidecounter][3] = $aasite; # 3 - the sequence position of the peptide $peptidecounter++; print "."; } $sequence =~ s/-//g; $aasite += length($sequence); } open(SAVE,">ppm_$fname.py"); print SAVE "############################\n# Generated by peptidemap.pl\n# Copyright (c) 2004 cailog.coronin.us\n# Copyright (c) 2004 - Liang CAI - coronin\@unc.edu\n############################\n"; print SAVE "from matplotlib.matlab import *\n"; print SAVE "xlabel('Electrophoresis in $in[$i]')\n"; print SAVE "ylabel('Chromatorgraphy in $jn[$j]')\n"; print SAVE "title('Theoretical Peptide Map')\n"; my $printnumber = $peptidecounter; while ($peptidecounter) { my $ppmsize = 50 * (length($result[$printnumber-$peptidecounter][2]) ** 0.5); print SAVE "scatter([$result[$printnumber-$peptidecounter][1]], [$result[$printnumber-$peptidecounter][0]], s=$ppmsize, alpha=0.75, $aasymboln[$result[$printnumber-$peptidecounter][5]])\n"; print SAVE "text($result[$printnumber-$peptidecounter][1], $result[$printnumber-$peptidecounter][0], ' $result[$printnumber-$peptidecounter][3]$aatextn[$result[$printnumber-$peptidecounter][5]]', fontsize='xx-small')\n"; $peptidecounter--; } print SAVE "grid(True)\n"; print SAVE "savefig('ppm_$fname.png', dpi=200)\n"; close SAVE; open(SAVE,">ppm_$fname.txt"); print SAVE "############################\n# Generated by peptidemap.pl\n# Copyright (c) 2004 cailog.coronin.us\n# Copyright (c) 2004 - Liang CAI - coronin\@unc.edu\n############################\n"; print SAVE "Electrophoresis in $in[$i].\nChromatorgraphy in $jn[$j].\n----\n"; print SAVE "Site\tSequence\n"; my $peptidecounter = $printnumber; while ($peptidecounter) { print SAVE "$result[$printnumber-$peptidecounter][3]\t$result[$printnumber-$peptidecounter][2]\n"; $peptidecounter--; } close SAVE; print "\n\n$printnumber peptides mapping information print successfully.\n"; system("python ppm_$fname.py"); system("del ppm_$fname.py"); if ($clean) { system("del ppm_$fname.txt"); system("del -ppm_$fname.txt"); print "\n\nFiles are cleaned."; } exit; # core code to calculate Mf and Rf of a given peptide sub peptidemapcore { $peptide = $_[0]; # data from Hunter's 1991 review by William J Boyle, Peter Van Der Geer and Tony Hunter. # 0 expected charges for pH 8.9 electrophoresis buffer # 1 expected charges for pH 1.9 electrophoresis buffer # 2 expected charges for pH 3.5 electrophoresis buffer # 3 expected charges for pH 4.7 electrophoresis buffer # 4 expected charges for pH 6.5 electrophoresis buffer my @charge = ( [0.5, -1, 1, -1, -1, 0, -1, 1, -2, -2, -2], [1, 0, 1, 0, -1, 1, 0, 1, -1, -1, -1], [1, -0.5, 1, 0, -1, 1, 0, 1, -1, -1, -1], [1, -1, 1, -0.7, -1, 1, -0.5, 1, -1, -1, -1], [1, -1, 1, -1, -1, 0.5, -1, 1, -1.3, -1.3, -1.3], ); # 0 Molecular Weight of amino acide # 1 Rf of amino acide in Regular Buffer # 2 Rf of amino acide in Phosphochromatography Buffer # 3 Rf of amino acide in Isobutyric Buffer my @mobility = ( [89, 174, 132, 133, 121, 147, 146, 75, 155, 131, 131, 146, 149, 165, 115, 105, 185, 119, 199, 204, 181, 261, 117], [0.35, 0.2, 0.15, 0.14, 0.08, 0.22, 0.2, 0.23, 0.19, 0.76, 0.81, 0.14, 0.26, 0.77, 0.4, 0.25, 0.25, 0.34, 0.34, 0.69, 0.64, 0.64, 0.56], [0.41, 0.31, 0.21, 0.22, 0.19, 0.31, 0.29, 0.3, 0.29, 0.77, 0.81, 0.26, 0.45, 0.78, 0.47, 0.33, 0.2, 0.41, 0.23, 0.69, 0.66, 0.28, 0.62], [0.61, 0.69, 0.44, 0.46, 0.23, 0.53, 0.51, 0.5, 0.67, 0.92, 0.95, 0.58, 0.59, 0.93, 0.73, 0.48, 0.35, 0.57, 0.43, 0.72, 0.72, 0.53, 0.82], ); # count number of pSer, pThr, pTyr first, then count other amino acids my @aa = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0); $aa[21] = $peptide =~ s/Y-//g; #pTyr $aa[18] = $peptide =~ s/T-//g; #pThr $aa[16] = $peptide =~ s/S-//g; #pSer $aa[0] = $peptide =~ s/A//g; #Ala $aa[1] = $peptide =~ s/R//g; #Arg $aa[2] = $peptide =~ s/N//g; #Asn $aa[3] = $peptide =~ s/D//g; #Asp $aa[4] = $peptide =~ s/C//g; #CysSO3H $aa[5] = $peptide =~ s/E//g; #Glu $aa[6] = $peptide =~ s/Q//g; #Gln $aa[7] = $peptide =~ s/G//g; #Gly $aa[8] = $peptide =~ s/H//g; #His $aa[9] = $peptide =~ s/I//g; #Ile $aa[10] = $peptide =~ s/L//g; #Leu $aa[11] = $peptide =~ s/K//g; #Lys $aa[12] = $peptide =~ s/M//g; #Metsulfone; $aa[13] = $peptide =~ s/F//g; #Phe $aa[14] = $peptide =~ s/P//g; #Pro $aa[15] = $peptide =~ s/S//g; #Ser $aa[17] = $peptide =~ s/T//g; #Thr $aa[19] = $peptide =~ s/W//g; #Trp $aa[20] = $peptide =~ s/Y//g; #Tyr $aa[22] = $peptide =~ s/V//g; #Val # make sure the pepdite sequence is right if ($peptide) { print "Wrong amino acid sequence!\nPlease check the input file.\nOnly one-letter code and - are allowed in the file."; exit (9); } # caculate values # only Arg, Asp, CysSO3H, His, Glu, Lys, pSer, pThr, pTyr have expected charges $netcharge = $charge[$i][0] + $charge[$i][1] + $aa[1]*$charge[$i][2] + $aa[3]*$charge[$i][3] + $aa[4]*$charge[$i][4] + $aa[8]*$charge[$i][5] + $aa[5]*$charge[$i][6] + $aa[11]*$charge[$i][7] + $aa[16]*$charge[$i][8] + $aa[18]*$charge[$i][9] + $aa[21]*$charge[$i][10]; if ($netcharge < 0) { $netcharge = 0 - $netcharge; } my $counter = 0; my $rftotal = 0; my $aanumber = $mass = 0; while ($counter < 23) { $aanumber = $aanumber + $aa[$counter]; $mass = $mass + $aa[$counter] * $mobility[0][$counter]; $rftotal = $rftotal + $aa[$counter] * $mobility[$j][$counter]; $counter ++; } # deal with the H2O mass $mass = $mass - 18 * ($aanumber - 1); my $MfX = $netcharge / ($mass ** (2/3)); my $RfY = $rftotal / $aanumber; my $AAsymbol = 0; #mix sites 0 if ($aa[21] == 1 && $aa[18] == 0 && $aa[16] == 0) { $AAsymbol = 1; # pTyr 1 } if ($aa[18] == 1 && $aa[21] == 0 && $aa[16] == 0) { $AAsymbol = 2; # pThr 2 } if ($aa[16] == 1 && $aa[21] == 0 && $aa[18] == 0) { $AAsymbol = 3; # pSer 3 } if ($aa[18] == 0 && $aa[21] == 0 && $aa[16] == 0) { $AAsymbol = 4; # none 4 } return ($RfY, $MfX, $AAsymbol); } # code to add : to match the tryptic pattern sub tryptic { $peptide2cut = $_[0]; $peptide2cut = uc($peptide2cut); $peptide2cut =~ s/[0-9]//g; $peptide2cut =~ s/\s//g; # based on http://us.expasy.org/tools/peptidecutter/peptidecutter_enzymes.html#Tryps, add ":" to mark the trypsin digestion sites $peptide2cut =~ s/R/R:/g; $peptide2cut =~ s/K/K:/g; $peptide2cut =~ s/R:P/RP/g; $peptide2cut =~ s/K:P/KP/g; $peptide2cut =~ s/WKP/WK:P/g; $peptide2cut =~ s/MRP/MR:P/g; $peptide2cut =~ s/CK:D/CKD/g; $peptide2cut =~ s/DK:D/DKD/g; $peptide2cut =~ s/CK:H/CKH/g; $peptide2cut =~ s/CK:Y/CKY/g; $peptide2cut =~ s/CR:K/CRK/g; $peptide2cut =~ s/R:R:H/R:RH/g; $peptide2cut =~ s/RR:H/RRH/g; $peptide2cut =~ s/R:R:R/R:RR/g; $peptide2cut =~ s/RR:R/RRR/g; $peptide2cut =~ s/:$//g; # based on Hunter's review to deal with the problem that phosphorylation affects digestion efficiency $peptide2cut =~ s/R:([A-Z]S-)/R$1/g; $peptide2cut =~ s/R:([A-Z]T-)/R$1/g; $peptide2cut =~ s/R:K:S-/RKS-/g; $peptide2cut =~ s/R:K:T-/RKT-/g; $peptide2cut =~ s/R:R:S-/RR:S-/g; $peptide2cut =~ s/R:R:T-/RR:T-/g; open(SAVE,">-ppm_$fname.txt"); print SAVE $peptide2cut; close SAVE; return $peptide2cut; }