-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreateLoopFile.pl
More file actions
executable file
·523 lines (481 loc) · 29.4 KB
/
createLoopFile.pl
File metadata and controls
executable file
·523 lines (481 loc) · 29.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
#!/usr/bin/perl -w
# Author: Abdullah Kahraman
# Date: 24.10.2012
###############################################################################
###############################################################################
### Creates a loop file for ROSETTA loop modelling based on PDB file and ###
### Uniprot fasta file of a protein. ###
###############################################################################
###############################################################################
use strict;
use warnings;
use Getopt::Long;
my (
# variable for parameters which are read in from commandline
$help,
$pdbFile,
$fastaFile,
$loopFile,
$newPdbFile,
);
##############################################################################
### read all needed parameters from commandline ##############################
&GetOptions(
"help!" => \$help, # print this help
"pdb=s" => \$pdbFile, # PDB file of protein
"fasta=s" => \$fastaFile, # Fasta sequence file of protein
"loop=s" => \$loopFile, # Loop output file
"new=s" => \$newPdbFile, # New PDB output file
) or die "\nTry \"$0 -h\" for a complete list of options\n\n";
##############################################################################
# help
if ($help) {printHelp(); exit}
##############################################################################
### SETTINGS #################################################################
##############################################################################
my $needle = "/cluster/home/biol/akahrama/bin/EMBOSS-6.2.0/emboss/needle";
my %aa3 = ("GLY" => "G",
"ALA" => "A",
"VAL" => "V",
"LEU" => "L",
"ILE" => "I",
"PHE" => "F",
"PRO" => "P",
"TRP" => "W",
"ASN" => "N",
"GLN" => "Q",
"MET" => "M",
"CYS" => "C",
"THR" => "T",
"TYR" => "Y",
"SER" => "S",
"ARG" => "R",
"LYS" => "K",
"HIS" => "H",
"ASP" => "D",
"GLU" => "E",
"ASX" => "B",
"GLX" => "Z");
my %aa1 = ("G" => "GLY",
"A" => "ALA",
"V" => "VAL",
"L" => "LEU",
"I" => "ILE",
"F" => "PHE",
"P" => "PRO",
"W" => "TRP",
"N" => "ASN",
"Q" => "GLN",
"M" => "MET",
"C" => "CYS",
"T" => "THR",
"Y" => "TYR",
"S" => "SER",
"R" => "ARG",
"K" => "LYS",
"H" => "HIS",
"D" => "ASP",
"E" => "GLU",
"B" => "ASX",
"Z" => "GLX");
##############################################################################
### SUBROUTINES ##############################################################
##############################################################################
###############################################################################
sub printHelp {
###############################################################################
# prints a help about the using and parameters of this scripts
# (execute if user types commandline parameter -h)
# param: no paramaters
# return: no return value
my (
$usage,
$sourceCode,
@rows,
$row,
$option,
$scriptInfo,
$example,
);
$usage = "$0 -hhr xxx.hhr -fasta xxx.fasta\n";
print "\nUsage: " . $usage . "\n";
print "Valid options are:\n\n";
open(MYSELF, "$0") or
die "Cannot read source code file $0: $!\n";
$sourceCode .= join "", <MYSELF>;
close MYSELF;
$sourceCode =~ s/^.+?\&GetOptions\(\n//s;
$sourceCode =~ s/\n\).+$//s;
@rows = split /\n/, $sourceCode;
foreach $row (@rows){
$option = $row;
$option =~ s/\s+\"//g;
$option =~ s/\"\s.+\#/\t\#/g;
$option =~ s/=./\t<value> [required]/;
$option =~ s/:./\t<value> [optional]/;
$option =~ s/!/\t<non value> [optional]/;
$row =~ s/^.*//;
print "\t";
printf("%-1s%-30s%-30s\n", "-",$option,$row);
} # end of foreach $row (@rows)
print "\n";
print "Options may be abreviated, e.g. -h for --help\n\n";
$example = "$0";
}
##############################################################################
sub getPDBaa{
(my $aaName, my $aaNo, my $chain) = @_;
my $aaBlock = "";
if($aaName eq "GLY"){
$aaBlock .= sprintf("ATOM 252 N GLY %1s %3d 9.082 -34.809 -19.487 1.00 28.90 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 253 CA GLY %1s %3d 8.890 -35.894 -20.455 1.00 28.69 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 254 C GLY %1s %3d 8.298 -37.183 -19.924 1.00 28.51 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 255 O GLY %1s %3d 8.367 -38.200 -20.599 1.00 28.64 O\n",$chain, $aaNo);
} elsif($aaName eq "ALA"){
$aaBlock .= sprintf("ATOM 363 N ALA %1s %3d -4.990 -25.104 -15.008 1.00 22.41 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 364 CA ALA %1s %3d -4.112 -23.995 -15.325 1.00 22.20 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 365 C ALA %1s %3d -4.425 -23.400 -16.690 1.00 22.38 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 366 O ALA %1s %3d -4.746 -22.207 -16.816 1.00 22.10 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 367 CB ALA %1s %3d -2.682 -24.463 -15.297 1.00 22.28 C\n",$chain, $aaNo);
} elsif($aaName eq "VAL"){
$aaBlock .= sprintf("ATOM 356 N VAL %1s %3d -6.489 -27.414 -15.299 1.00 22.42 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 357 CA VAL %1s %3d -7.074 -26.263 -14.599 1.00 22.29 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 358 C VAL %1s %3d -6.310 -24.994 -14.948 1.00 22.35 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 359 O VAL %1s %3d -6.895 -23.946 -15.150 1.00 23.00 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 360 CB VAL %1s %3d -7.043 -26.478 -13.042 1.00 22.43 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 361 CG1 VAL %1s %3d -7.374 -25.199 -12.302 1.00 22.25 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 362 CG2 VAL %1s %3d -7.980 -27.643 -12.613 1.00 21.06 C\n",$chain, $aaNo);
} elsif($aaName eq "LEU"){
$aaBlock .= sprintf("ATOM 706 N LEU %1s %3d -4.325 -15.704 -27.424 1.00 26.46 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 707 CA LEU %1s %3d -5.163 -16.890 -27.410 1.00 26.84 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 708 C LEU %1s %3d -4.539 -18.013 -28.285 1.00 27.08 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 709 O LEU %1s %3d -4.369 -19.149 -27.843 1.00 26.97 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 710 CB LEU %1s %3d -6.566 -16.445 -27.866 1.00 26.89 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 711 CG LEU %1s %3d -7.879 -17.232 -27.902 1.00 27.06 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 712 CD1 LEU %1s %3d -7.616 -18.654 -28.375 1.00 29.41 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 713 CD2 LEU %1s %3d -8.593 -17.215 -26.600 1.00 25.91 C\n",$chain, $aaNo);
} elsif($aaName eq "ILE"){
$aaBlock .= sprintf("ATOM 731 N ILE %1s %3d -1.383 -19.368 -26.730 1.00 25.39 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 732 CA ILE %1s %3d -1.697 -20.206 -25.583 1.00 25.19 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 733 C ILE %1s %3d -2.128 -21.588 -26.051 1.00 25.17 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 734 O ILE %1s %3d -1.642 -22.589 -25.534 1.00 24.61 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 735 CB ILE %1s %3d -2.784 -19.588 -24.662 1.00 25.17 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 736 CG1 ILE %1s %3d -2.228 -18.344 -23.952 1.00 23.44 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 737 CG2 ILE %1s %3d -3.284 -20.633 -23.639 1.00 24.44 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 738 CD1 ILE %1s %3d -3.262 -17.364 -23.568 1.00 23.67 C\n",$chain, $aaNo);
} elsif($aaName eq "PHE"){
$aaBlock .= sprintf("ATOM 920 N PHE %1s %3d -8.367 -38.325 -21.915 1.00 24.79 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 921 CA PHE %1s %3d -8.244 -37.001 -21.301 1.00 24.35 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 922 C PHE %1s %3d -7.503 -36.011 -22.191 1.00 23.74 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 923 O PHE %1s %3d -7.915 -34.864 -22.323 1.00 23.36 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 924 CB PHE %1s %3d -7.518 -37.109 -19.958 1.00 24.52 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 925 CG PHE %1s %3d -7.607 -35.866 -19.113 1.00 24.69 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 926 CD1 PHE %1s %3d -6.659 -34.847 -19.225 1.00 23.66 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 927 CD2 PHE %1s %3d -8.633 -35.724 -18.187 1.00 25.01 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 928 CE1 PHE %1s %3d -6.749 -33.709 -18.418 1.00 23.96 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 929 CE2 PHE %1s %3d -8.729 -34.593 -17.389 1.00 24.87 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 930 CZ PHE %1s %3d -7.778 -33.582 -17.506 1.00 25.11 C\n",$chain, $aaNo);
} elsif($aaName eq "TYR"){
$aaBlock .= sprintf("ATOM 931 N TYR %1s %3d -6.415 -36.473 -22.795 1.00 23.39 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 932 CA TYR %1s %3d -5.559 -35.627 -23.627 1.00 23.17 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 933 C TYR %1s %3d -6.147 -35.371 -25.016 1.00 23.22 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 934 O TYR %1s %3d -6.083 -34.246 -25.509 1.00 23.02 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 935 CB TYR %1s %3d -4.126 -36.210 -23.718 1.00 22.90 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 936 CG TYR %1s %3d -3.393 -36.215 -22.382 1.00 22.56 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 937 CD1 TYR %1s %3d -3.383 -35.080 -21.570 1.00 22.01 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 938 CD2 TYR %1s %3d -2.717 -37.354 -21.920 1.00 22.72 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 939 CE1 TYR %1s %3d -2.734 -35.070 -20.325 1.00 22.59 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 940 CE2 TYR %1s %3d -2.050 -37.345 -20.677 1.00 21.91 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 941 CZ TYR %1s %3d -2.065 -36.197 -19.890 1.00 21.67 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 942 OH TYR %1s %3d -1.438 -36.151 -18.669 1.00 21.26 O\n",$chain, $aaNo);
} elsif($aaName eq "TRP"){
$aaBlock .= sprintf("ATOM 1807 N TRP %1s %3d -31.373 -26.902 -25.772 1.00 41.48 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1808 CA TRP %1s %3d -31.210 -27.087 -27.204 1.00 41.43 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1809 C TRP %1s %3d -32.488 -27.492 -27.919 1.00 42.45 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1810 O TRP %1s %3d -32.656 -27.176 -29.086 1.00 42.43 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1811 CB TRP %1s %3d -30.103 -28.096 -27.499 1.00 40.93 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1812 CG TRP %1s %3d -28.725 -27.619 -27.195 1.00 39.87 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1813 CD1 TRP %1s %3d -28.299 -26.334 -27.107 1.00 40.15 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1814 CD2 TRP %1s %3d -27.575 -28.437 -26.965 1.00 40.42 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1815 NE1 TRP %1s %3d -26.954 -26.291 -26.826 1.00 40.09 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1816 CE2 TRP %1s %3d -26.486 -27.574 -26.730 1.00 40.04 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1817 CE3 TRP %1s %3d -27.355 -29.825 -26.929 1.00 40.43 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1818 CZ2 TRP %1s %3d -25.199 -28.050 -26.462 1.00 40.14 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1819 CZ3 TRP %1s %3d -26.072 -30.293 -26.666 1.00 39.97 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1820 CH2 TRP %1s %3d -25.015 -29.410 -26.436 1.00 39.96 C\n",$chain, $aaNo);
} elsif($aaName eq "HIS"){
$aaBlock .= sprintf("ATOM 256 N HIS %1s %3d 7.704 -37.135 -18.728 1.00 28.26 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 257 CA HIS %1s %3d 7.193 -38.321 -18.055 1.00 28.00 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 258 C HIS %1s %3d 5.706 -38.221 -17.931 1.00 27.82 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 259 O HIS %1s %3d 5.170 -37.116 -17.834 1.00 28.16 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 260 CB HIS %1s %3d 7.782 -38.438 -16.651 1.00 27.86 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 261 CG HIS %1s %3d 9.272 -38.528 -16.632 1.00 27.93 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 262 ND1 HIS %1s %3d 10.062 -37.661 -15.906 1.00 28.01 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 263 CD2 HIS %1s %3d 10.118 -39.376 -17.261 1.00 27.74 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 264 CE1 HIS %1s %3d 11.330 -37.978 -16.082 1.00 28.43 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 265 NE2 HIS %1s %3d 11.392 -39.017 -16.901 1.00 28.66 N\n",$chain, $aaNo);
} elsif($aaName eq "ARG"){
$aaBlock .= sprintf("ATOM 315 N ARG %1s %3d -1.959 -33.878 -13.817 1.00 29.93 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 316 CA ARG %1s %3d -2.790 -33.790 -14.990 1.00 29.73 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 317 C ARG %1s %3d -3.952 -32.818 -14.787 1.00 29.14 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 318 O ARG %1s %3d -4.298 -32.070 -15.693 1.00 29.20 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 319 CB ARG %1s %3d -3.324 -35.177 -15.346 1.00 29.73 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 320 CG ARG %1s %3d -4.083 -35.226 -16.641 1.00 30.51 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 321 CD ARG %1s %3d -4.706 -36.564 -16.831 1.00 32.67 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 322 NE ARG %1s %3d -5.756 -36.790 -15.837 1.00 34.38 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 323 CZ ARG %1s %3d -6.221 -37.986 -15.487 1.00 35.20 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 324 NH1 ARG %1s %3d -5.739 -39.091 -16.048 1.00 37.21 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 325 NH2 ARG %1s %3d -7.174 -38.079 -14.573 1.00 35.39 N\n",$chain, $aaNo);
} elsif($aaName eq "LYS"){
$aaBlock .= sprintf("ATOM 569 N LYS %1s %3d -10.809 6.109 -35.334 1.00 33.72 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 570 CA LYS %1s %3d -9.727 5.684 -34.415 1.00 33.36 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 571 C LYS %1s %3d -10.200 4.716 -33.329 1.00 33.01 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 572 O LYS %1s %3d -9.468 3.806 -32.977 1.00 33.23 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 573 CB LYS %1s %3d -9.042 6.876 -33.744 1.00 33.16 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 574 CG LYS %1s %3d -7.893 7.444 -34.517 1.00 32.86 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 575 CD LYS %1s %3d -7.255 8.608 -33.763 1.00 32.89 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 576 CE LYS %1s %3d -6.244 9.321 -34.629 1.00 31.72 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 577 NZ LYS %1s %3d -5.502 10.377 -33.909 1.00 30.96 N\n",$chain, $aaNo);
} elsif($aaName eq "ASP"){
$aaBlock .= sprintf("ATOM 723 N ASP %1s %3d -1.393 -18.291 -29.352 1.00 27.60 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 724 CA ASP %1s %3d -0.138 -18.697 -28.719 1.00 27.94 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 725 C ASP %1s %3d -0.347 -19.634 -27.528 1.00 26.65 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 726 O ASP %1s %3d 0.430 -20.569 -27.342 1.00 26.11 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 727 CB ASP %1s %3d 0.691 -17.475 -28.291 1.00 28.40 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 728 CG ASP %1s %3d 1.495 -16.899 -29.450 1.00 32.77 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 729 OD1 ASP %1s %3d 0.879 -16.586 -30.524 1.00 37.85 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 730 OD2 ASP %1s %3d 2.745 -16.788 -29.302 1.00 36.69 O\n",$chain, $aaNo);
} elsif($aaName eq "GLU"){
$aaBlock .= sprintf("ATOM 697 N GLU %1s %3d -2.586 -13.717 -28.620 1.00 25.58 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 698 CA GLU %1s %3d -2.348 -14.331 -27.311 1.00 25.66 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 699 C GLU %1s %3d -3.030 -15.703 -27.175 1.00 25.79 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 700 O GLU %1s %3d -2.404 -16.700 -26.854 1.00 25.35 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 701 CB GLU %1s %3d -2.852 -13.420 -26.206 1.00 25.13 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 702 CG GLU %1s %3d -1.888 -12.364 -25.815 1.00 24.93 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 703 CD GLU %1s %3d -2.407 -11.555 -24.651 1.00 25.66 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 704 OE1 GLU %1s %3d -3.096 -12.132 -23.786 1.00 25.02 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 705 OE2 GLU %1s %3d -2.135 -10.342 -24.600 1.00 25.56 O\n",$chain, $aaNo);
} elsif($aaName eq "ASN"){
$aaBlock .= sprintf("ATOM 745 N ASN %1s %3d -1.411 -23.022 -28.884 1.00 25.99 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 746 CA ASN %1s %3d -0.351 -23.699 -29.632 1.00 25.70 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 747 C ASN %1s %3d 0.606 -24.317 -28.654 1.00 25.14 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 748 O ASN %1s %3d 1.102 -25.408 -28.894 1.00 25.13 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 749 CB ASN %1s %3d 0.379 -22.748 -30.606 1.00 25.73 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 750 CG ASN %1s %3d -0.482 -22.370 -31.823 1.00 27.34 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 751 OD1 ASN %1s %3d -1.251 -23.175 -32.342 1.00 27.19 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 752 ND2 ASN %1s %3d -0.359 -21.125 -32.266 1.00 31.39 N\n",$chain, $aaNo);
} elsif($aaName eq "GLN"){
$aaBlock .= sprintf("ATOM 873 N GLN %1s %3d -2.110 -45.346 -20.481 1.00 31.75 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 874 CA GLN %1s %3d -3.390 -45.939 -20.090 1.00 31.84 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 875 C GLN %1s %3d -4.528 -45.202 -20.772 1.00 31.58 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 876 O GLN %1s %3d -4.421 -44.013 -21.040 1.00 31.00 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 877 CB GLN %1s %3d -3.580 -45.876 -18.587 1.00 31.38 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 878 CG GLN %1s %3d -2.691 -46.839 -17.824 1.00 32.34 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 879 CD GLN %1s %3d -2.669 -46.536 -16.342 1.00 32.54 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 880 OE1 GLN %1s %3d -2.664 -45.368 -15.944 1.00 35.20 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 881 NE2 GLN %1s %3d -2.659 -47.576 -15.514 1.00 31.83 N\n",$chain, $aaNo);
} elsif($aaName eq "PRO"){
$aaBlock .= sprintf("ATOM 882 N PRO %1s %3d -5.624 -45.915 -21.067 1.00 31.73 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 883 CA PRO %1s %3d -6.747 -45.324 -21.802 1.00 31.69 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 884 C PRO %1s %3d -7.407 -44.097 -21.132 1.00 31.54 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 885 O PRO %1s %3d -7.824 -43.180 -21.835 1.00 31.62 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 886 CB PRO %1s %3d -7.726 -46.496 -21.944 1.00 31.53 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 887 CG PRO %1s %3d -7.327 -47.463 -20.910 1.00 31.50 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 888 CD PRO %1s %3d -5.863 -47.334 -20.758 1.00 31.51 C\n",$chain, $aaNo);
} elsif($aaName eq "SER"){
$aaBlock .= sprintf("ATOM 898 N SER %1s %3d -6.074 -41.550 -19.460 1.00 29.61 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 899 CA SER %1s %3d -5.247 -40.420 -19.850 1.00 28.88 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 900 C SER %1s %3d -5.347 -40.108 -21.324 1.00 28.58 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 901 O SER %1s %3d -5.495 -38.947 -21.703 1.00 28.84 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 902 CB SER %1s %3d -3.788 -40.722 -19.551 1.00 28.53 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 903 OG SER %1s %3d -3.571 -40.642 -18.177 1.00 28.24 O\n",$chain, $aaNo);
} elsif($aaName eq "THR"){
$aaBlock .= sprintf("ATOM 1110 N THR %1s %3d -9.486 -26.130 -41.142 1.00 38.99 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1111 CA THR %1s %3d -8.470 -27.053 -40.620 1.00 38.59 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1112 C THR %1s %3d -8.634 -27.239 -39.109 1.00 37.67 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1113 O THR %1s %3d -8.711 -28.353 -38.592 1.00 37.67 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1114 CB THR %1s %3d -7.043 -26.503 -40.875 1.00 38.53 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1115 OG1 THR %1s %3d -7.000 -25.867 -42.150 1.00 40.24 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1116 CG2 THR %1s %3d -5.998 -27.594 -40.837 1.00 38.47 C\n",$chain, $aaNo);
} elsif($aaName eq "CYS"){
$aaBlock .= sprintf("ATOM 1506 N CYS %1s %3d -27.462 -32.935 -29.977 1.00 28.60 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1507 CA CYS %1s %3d -28.478 -33.651 -29.201 1.00 28.57 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1508 C CYS %1s %3d -28.286 -35.163 -29.155 1.00 27.88 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1509 O CYS %1s %3d -28.482 -35.775 -28.115 1.00 28.07 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1510 CB CYS %1s %3d -29.863 -33.351 -29.748 1.00 28.78 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1511 SG CYS %1s %3d -30.355 -31.735 -29.290 1.00 30.85 S\n",$chain, $aaNo);
} elsif($aaName eq "MET"){
$aaBlock .= sprintf("ATOM 1724 N MET %1s %3d -23.545 -34.935 -15.636 1.00 24.99 N\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1725 CA MET %1s %3d -24.821 -35.466 -16.152 1.00 25.11 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1726 C MET %1s %3d -25.919 -34.414 -16.120 1.00 25.36 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1727 O MET %1s %3d -26.743 -34.329 -17.030 1.00 25.52 O\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1728 CB MET %1s %3d -25.279 -36.674 -15.335 1.00 24.73 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1729 CG MET %1s %3d -24.463 -37.905 -15.604 1.00 24.70 C\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1730 SD MET %1s %3d -24.678 -39.148 -14.328 1.00 25.25 S\n",$chain, $aaNo);
$aaBlock .= sprintf("ATOM 1731 CE MET %1s %3d -26.376 -39.651 -14.545 1.00 24.43 C\n",$chain, $aaNo);
} else {
print STDERR "ERROR: $aaName is of unknown AA type!\n";
}
return $aaBlock;
}
##############################################################################
sub pdb2fasta {
(my $pdbFile) = @_;
my @a = split(/\n/,`grep ^ATOM $pdbFile`);
my %uniqAA;
my $seq = "";
my $resCount = 0;
foreach my $l (@a){
if($l=~/^ATOM/){
my $resName = substr($l, 17, 3);
$resName =~ s/\s//g;
my $resNo = substr($l, 22, 4);
$resNo =~ s/\s//g;
my $chainId = substr($l, 21, 1);
my $aa = "$resName-$resNo-$chainId";
if(!exists $uniqAA{$aa}){
$uniqAA{$aa} = $aa;
if(exists $aa3{$resName}){
$seq .= $aa3{$resName};
} else {
print STDERR "WARNING: $resName-$resNo-$chainId is non-standard AA\n";
}
}
}
}
return $seq;
}
##############################################################################
sub readAlignment(){
(my $aln, my $fastaName1, my $fastaName2) = @_;
open(F,$aln) or die "ERROR: Failed to open alignment file \"$aln\"";
my $h1;
my $h2;
while(<F>){
chomp($_);
if(/^[A-Za-z0-9]/){
if(/^$fastaName1/){
$_=~s/.*\d\s([A-Za-z\-])/$1/;
$_=~s/\s+\d+//;
$h1 .= $_;
}
else{
$_=~s/.*\d\s([A-Za-z\-])/$1/;
$_=~s/\s+\d+//;
$h2 .= $_;
}
}
}
close(F);
if(length($h1) != length($h2)){
die("ERROR while reading alignment file. Sequence lengths do not match: ".length($h1)." vs ".length($h2)."\n");
}
return $h2;
}
##############################################################################
sub makeAlignment{
(my $targetSeq, my $templateSeq) = @_;
my $target = "target.fasta";
my $template = "template.fasta";
my $aln = "target-template.aln";
open(O1, ">$target");
print O1 ">target\n$targetSeq\n";
close(O1);
open(O2, ">$template");
print O2 ">template\n$templateSeq\n";
close(O2);
# print "SEQRES:\n$targetSeq\nATOM:\n$templateSeq\n";
my $cmd = "$needle $target $template -gapopen 10 -gapextend 0.5 $aln";
`$cmd`;# or die "ERROR while performing Needleman-Wunsch alignment ($cmd): $!";
return &readAlignment($aln, "target", "template");
}
##############################################################################
sub renumberPDB{
(my $atoms) = @_;
my @atoms = split(/\n/,$atoms);
my $index = 0;
my $resNoIndex = 0;
my $newAtoms = "";
my $preResNo = 0;
foreach my $atom (@atoms){
$index++;
my $flag=substr($atom, 0, 6);
my $resNo = substr($atom,22,4);
my $rest = substr($atom, 11);
if($resNo!=$preResNo){
$resNoIndex++;
$preResNo = $resNo;
}
my $newAtom = sprintf("%s%5s%s\n", $flag, $index, $rest);
substr($newAtom,22,4,sprintf("%4s",$resNoIndex));
$newAtoms .= $newAtom;
}
return $newAtoms;
}
##############################################################################
sub loopLine{
(my $start, my $end, my $cut) = @_;
return sprintf("LOOP %i %i %i 0 1\n", $start, $end, $cut);
}
##############################################################################
### END OF SUBROUTINES########################################################
##############################################################################
############
### MAIN ###
############
if(!defined $pdbFile or
!defined $fastaFile or
!defined $loopFile or
!defined $newPdbFile){
die "Please specify -pdb AND -fasta AND -loop AND -new or type -h";
}
my $pdbSeq = &pdb2fasta($pdbFile);
my $fastaSeq = `grep -v "^>" $fastaFile | tr -d '\n'`;
my $pdbAligned = &makeAlignment($fastaSeq, $pdbSeq);
my %hash = ();
my @pdb = split(/\n/, `grep ^ATOM $pdbFile`);
my %pdb = ();
foreach my $atom (@pdb){
my $resNo = substr($atom, 22, 5);
$resNo =~ s/\s//g;
$pdb{$resNo} .= "$atom\n";
}
my $isGap = 0;
my $gapStart = -1;
my $gapEnd = -1;
my $pdbResNoIndex = 0;
my $loopResNoIndex = 999;
my $loopPdbFile = "";
open(O1, ">$loopFile") or die "ERROR: Failed to create $loopFile!\n";
for(my $i=0; $i<length($pdbAligned); $i++){
my $aa = substr($pdbAligned, $i,1);
if($aa eq "-"){
$loopPdbFile .= &getPDBaa($aa1{substr($fastaSeq,$i,1)},$loopResNoIndex--,"A");
$gapStart = $i+1 if($gapStart == -1);
$isGap = 1;
} else {
$pdbResNoIndex++;
$loopPdbFile .= $pdb{$pdbResNoIndex};
if($isGap == 1){
$gapEnd = $i;
if($gapStart == 1){
# for modeling N-termini loops
print O1 &loopLine($gapStart, $gapEnd+1, $gapEnd+1);
} else {
print O1 &loopLine($gapStart-1, $gapEnd+1, ($gapStart+$gapEnd)/2);
}
}
$gapStart = -1;
$gapEnd = -1;
$isGap = 0;
}
}
# for modeling C-termini loops
if($isGap == 1){
$gapEnd = length($pdbAligned);
print O1 &loopLine($gapStart-1, $gapEnd, $gapStart-1);
}
close(O1);
open(O2, ">$newPdbFile") or die "ERROR: Failed to create $newPdbFile!\n";
print O2 &renumberPDB($loopPdbFile);
close(O2);