! Projet de simulation numérique ! student : Christian Schlatter, Physique 4e année EPFL ! professeur : John Maddocks, DMA EPFL ! ! ********************************************************************** ! ! This release meets the version 2.2 of the PDB format description guide ! which is available on ! http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html ! Future modification of the field lengths (as proposed for the atom ! entry will require a modification of this code. ! I'm using Visual Fortran 6.6A from Compaq (formerly Digital), ! fully Fortran 95 compatible compiler. ! It should also be runnnable on a fully Fortran 90 compiler. ! For release information see module release module release ! Release information character (LEN=25), parameter :: releaseinfo = 'Release 1.31 (2002/04/28)' end module release module var ! Data type definitions type carbons integer :: residue real, dimension(3) :: coordinates character(LEN=1) :: chainident end type carbons end module var module reading ! Subroutines implicit none contains subroutine helices (filename, resolution, alphacarbons, talkmode, atomtype) ! Extracting of alpha helices use var implicit none ! Variables integer :: i, j, k, sequencenumber, integer :: io, oldatom, newatom integer :: numbercarbon, integer :: beginsequence, integer :: endsequence, numberhelix integer, allocatable, dimension(:,:) :: chainseq ! lines : found helices ! begin at sequence end at sequence character (LEN=1) :: test3, ident character (LEN=2) :: helixtype character (LEN=4) :: atomname, seq, beginseq, character (LEN=4) :: endseq, test6, atomtype character (LEN=5) :: res, test4 character (LEN=6) :: atom, helix character (LEN=8) :: x, y, z, test2 character (LEN=9) :: test1 character (LEN=10) :: test5 character (LEN=22) :: test character (LEN=32) :: test32 character (LEN=60) :: molecule character *(*), intent(in) :: filename character (LEN=1), allocatable, dimension(:,:) :: chain ! lines : found helices ! rows : location, orientation real :: xpos, ypos, zpos real, intent(out) :: resolution type(carbons), pointer, dimension(:) :: alphacarbons, dummy logical :: flag, talkmode OPEN (UNIT=1, FILE=filename, FORM='FORMATTED', STATUS='OLD') rewind 1 ! Reading of the PDB file header line read (1, '(a10, a60)', ADVANCE='YES') test5, molecule if (talkmode) then write (*, '(" The following protein is going to be analysed:")') write (*, *) write (*, '(" ", a60)') molecule end if read (1, '(a10, a60)', ADVANCE='YES') test5, molecule if (talkmode) then write (*, '(" ", a60)') molecule write (*, *) end if do ! Reading of the experimental data resolution value read (1, '(a22, a5)', ADVANCE='YES') test, res if (test == 'REMARK 2 RESOLUTION.') then exit endif end do if (res == ' NOT ') then if (talkmode) then write (*, '(" No resolution specified !!! ")') write (*, *) end if else read(res, '(f5.0)') resolution if (talkmode) then write (*, '(" The structure data has a resolution of ", f4.2, " angstroems")') resolution write (*, *) end if end if ! Determination of the number of present alpha helices rewind 1 numberhelix = 0 do read(1, '(a6, a32, a2)', ADVANCE='YES', IOSTAT = io) helix, test32, helixtype if (io < 0) then if ((talkmode) .and. (numberhelix == 0)) then write (*, '(" No alpha helices in this file !!")') write (*, *) return end if exit end if if ((helix == 'HELIX ') .and. ((helixtype == ' 6') .or. (helixtype == ' 1'))) then ! Alpha helix types : 1 means right-handed; 6 means left-handed helix numberhelix = numberhelix + 1 end if end do allocate(chainseq(numberhelix,2)) allocate(chain(numberhelix,2)) ! Determination of location of the alpha helix rewind 1 i = 1 chain = X flag = .false. HEL : do if (i > numberhelix) then ! All alpha helices have been read exit end if read(1, '(a6, a13, a1, a1, a4, a8, a4, a1, a2)', ADVANCE='YES', & IOSTAT = io) helix, test1, ident, test3, beginseq, & test2, endseq, test3, helixtype if ((helix == 'HELIX ') .and. ((helixtype == ' 6') .or. (helixtype == ' 1'))) then chain(i,1) = ident read(beginseq, '(i4)') chainseq(i,1) ! Helix starting sequence number read(endseq, '(i4)') chainseq(i,2) ! Helix stopping sequence number flag = .true. if (helixtype == ' 1') then if (talkmode) then write (*, '(" Right-handed alpha helix found in chain ", a1)') chain(i,1) end if chain(i,2) = 'R' i = i + 1 else if (talkmode) then write (*, '(" Left-handed alpha helix foundin chain ", a1)') chain(i,1) end if chain(i,2) = 'L' i = i + 1 endif endif if ((helix /= 'HELIX ') .and. (flag)) then exit HEL ! End of helix specificator list reached endif end do HEL if (talkmode) then write (*, *) write (*, '(" ", i3, " helices found.")') i - 1 write (*, *) end if j = 1 do k = 1, i - 1 numbercarbon = chainseq(k,2) - chainseq(k,1) + 1 if (associated(alphacarbons)) then ! Extending the variable 'alphacarbons' for the new helix allocate(dummy(size(alphacarbons))) dummy = alphacarbons deallocate(alphacarbons) allocate (alphacarbons(size(dummy) + numbercarbon)) alphacarbons (1 : j-1) = dummy (:) deallocate (dummy) else ! Allocating the variable 'alphacarbons' for the first helix allocate (alphacarbons(numbercarbon)) end if if (talkmode) then write (*, '(" There are ", i10, " carbon atoms located in the & alpha helix of chain ", a1)') & numbercarbon, chain(k,1) write (*, *) write (*, '(" The carbon atoms have the following coordinates:")') write (*, *) write (*, '(" # SEQ x [A] y [A] z [A]")') write (*, *) end if rewind 1 sequencenumber = 0 oldatom = -10 ATM : do ! Reading of the coordinates of the atoms located in the alpha helices read (1, '(a6, a5, a5, a5, a1, a4, a4, a8, a8, a8)', ADVANCE='YES') & atom, test4, atomname, test5, ident, seq, test6, x, y, z if ((atom == 'ATOM ') .and. (ident == chain(k,1))) then read (test4, '(i5)') newatom if ((atomname == atomtype) .and. (newatom == oldatom + 1)) then ! Error in PDB file or strange mutation : The same atom found as neighbours oldatom = newatom cycle end if if (atomname == atomtype) then oldatom = newatom end if read(seq, '(i4)') sequencenumber read(x, '(f8.0)') xpos read(y, '(f8.0)') ypos read(z, '(f8.0)') zpos endif if ((atom == 'ATOM ') .and. (atomname == atomtype) .and. & & (sequencenumber >= chainseq(k,1)) .and. (sequencenumber <= chainseq(k,2))) then ! Atom of type 'atomtype' found inside a helix if (talkmode) then write (*, '(" ", i5, i6, f8.3, f8.3, f8.3)') j, sequencenumber, xpos, ypos, zpos end if alphacarbons(j)%residue = sequencenumber alphacarbons(j)%coordinates(1) = xpos alphacarbons(j)%coordinates(2) = ypos alphacarbons(j)%coordinates(3) = zpos alphacarbons(j)%chainident = ident j = j + 1 endif if (sequencenumber >= chainseq(k,2) .and. (atomname == atomtype)) then ! End of helix reached if (talkmode) then write (*, *) end if exit ATM endif end do ATM end do CLOSE (UNIT=1, STATUS='KEEP') deallocate(chainseq, chain) end subroutine helices subroutine backbones (filename, resolution, alphacarbons, talkmode, atomtype) ! Extracting the backbone of the protein use var implicit none ! Variables integer, parameter :: maxhelices = 10 ! maximum possible number of helices integer :: i, j, k, sequencenumber, io, integer :: oldatom, newatom integer :: oldsequencenumber integer :: numbercarbon, beginsequence, integer :: endsequence, residues integer, dimension(maxhelices,2) :: chainseq ! lines : found helices ! begin at sequence end at sequence character (LEN=1) :: ident character (LEN=4) :: atomname, seq, test6, resi character (LEN=4) :: atomtype character (LEN=5) :: res, test4 character (LEN=6) :: atom, helix, seqres character (LEN=7) :: test1 character (LEN=8) :: x, y, z character (LEN=10) :: test5 character (LEN=22) :: test character (LEN=60) :: molecule character *(*), intent(in) :: filename character (LEN=1), dimension(maxhelices,2) :: chain ! lines : found helices ! rows : location, orientation real :: xpos, ypos, zpos real, intent(out) :: resolution type(carbons), pointer, dimension(:) :: alphacarbons, dummy logical :: flag, talkmode OPEN (UNIT=1, FILE=filename, FORM='FORMATTED', STATUS='OLD') ! Reading of the PDB file header line rewind 1 read (1, '(a10, a60)', ADVANCE='YES') test5, molecule if (talkmode) then write (*, '(" The following protein is going to be analysed:")') write (*, *) write (*, '(" ", a60)') molecule end if read (1, '(a10, a60)', ADVANCE='YES') test5, molecule if (talkmode) then write (*, '(" ", a60)') molecule write (*, *) end if do ! Reading ot the experimental data resolution value read (1, '(a22, a5)', ADVANCE='YES') test, res if (test == 'REMARK 2 RESOLUTION.') then exit endif end do if (res == ' NOT ') then if (talkmode) then write (*, '(" No resolution specified !!! ")') write (*, *) end if else read(res, '(f5.0)') resolution if (talkmode) then write (*, '(" The structure data has a resolution of ", f4.2, " angstroems")') resolution write (*, *) end if end if do ! Determination of the number of residues in the backbone read (1, '(a6, a7, a4)', ADVANCE='YES') seqres, test1, resi if (seqres == 'SEQRES') then exit endif end do read(resi, '(i4)') residues j = 1 allocate (alphacarbons(residues)) if (talkmode) then write (*, '(" There are ", i10, " residues in the backbone.")') residues write (*, *) write (*, '(" The carbon atoms have the following coordinates:")') write (*, *) write (*, '(" # SEQ x [A] y [A] z [A]")') write (*, *) end if rewind 1 sequencenumber = 0 oldatom = -10 ATM : do oldsequencenumber = sequencenumber ! Reading of the coordinates of the atoms read (1, '(a6, a5, a5, a5, a1, a4, a4, a8, a8, a8)', ADVANCE='YES', & IOSTAT = io) atom, test4, atomname, test5, ident, seq, & test6, x, y, z if (atom == 'ATOM ') then read (test4, '(i5)') newatom if ((atomname == atomtype) .and. (newatom == oldatom + 1)) then ! Error in PDB file or strange mutation : The same atom found as neighbours oldatom = newatom cycle end if if (atomname == atomtype) then oldatom = newatom end if read(seq, '(i4)') sequencenumber read(x, '(f8.0)') xpos read(y, '(f8.0)') ypos read(z, '(f8.0)') zpos endif if ((sequencenumber < oldsequencenumber) .or. (io < 0)) then ! If there were more than one atom of the chosen type per ! residue, the 'alphacarbons' variable has to be extended allocate(dummy(oldsequencenumber)) dummy = alphacarbons(1:oldsequencenumber) deallocate(alphacarbons) allocate(alphacarbons(size(dummy))) alphacarbons = dummy deallocate (dummy) write(*, *) exit ATM end if if ((atom == 'ATOM ') .and. (atomname == atomtype)) then ! An atom has been found if (j > residues) then ! Extending the variable 'alphacarbons' if the number of ! residues given in the PDB file header was not correct. allocate(dummy(size(alphacarbons))) dummy = alphacarbons deallocate(alphacarbons) allocate(alphacarbons(size(dummy) + 1)) alphacarbons(1:j-1) = dummy(:) deallocate (dummy) end if alphacarbons(j)%residue = sequencenumber alphacarbons(j)%coordinates(1) = xpos alphacarbons(j)%coordinates(2) = ypos alphacarbons(j)%coordinates(3) = zpos alphacarbons(j)%chainident = ident if (talkmode) then write (*, '(" ", i5, i6, f8.3, f8.3, f8.3)') j, sequencenumber, xpos, ypos, zpos end if j = j + 1 endif if (((sequencenumber == residues) .and. (atomname == atomtype)) .or. (atom == 'TER ')) then ! All atoms have been processed. if (talkmode) then write (*, *) end if if (j < residues) then ! Correction if number of found alphacarbons is smaller ! than the size of its allocated variable allocate(dummy(j-1)) dummy = alphacarbons(1:j-1) deallocate(alphacarbons) allocate(alphacarbons(size(dummy))) alphacarbons(:) = dummy(:) deallocate (dummy) end if exit ATM endif end do ATM CLOSE (UNIT=1, STATUS='KEEP') end subroutine backbones end module reading ! ********************************************************************** program extract ! Main program use reading use var use release implicit none ! Variables character (LEN=12) :: export, dummy character (LEN=3) :: subfolder, input character (LEN=4) :: extension, atomtype character (LEN=6) :: exportfolder character (LEN=12), allocatable, dimension(:) :: filename character, dimension(:), pointer :: name real :: resolution, min real(8) :: initial, final real, allocatable, dimension(:,:) :: allalphacarbons, oldcarbons real, pointer, dimension(:,:) :: alphacircles real, allocatable, dimension(:) :: minradius type(carbons), pointer, dimension(:) :: alphacarbons integer :: i, j, k, numberfiles, io, integer :: position, length integer :: what, resourceflag integer, allocatable, dimension(:,:) :: triple logical :: talkmode logical, dimension(:), allocatable :: goodfiles ! resourceflag = 0 ! either 0 : a single PDB-file specified by filename ! or 1 : a bunch of PDB-files listed in Files.ini ! is loaded. subfolder = 'PDB' ! name of the subdirectory where the PDB-resource ! files are stored in (with extension .ent or .pdb) exportfolder = 'source' ! name of the subfolder for ASCII export files talkmode = .true. ! if true, detailed comments are printed on screen if (talkmode) then write (*, *) write (*, '(" Computational physics project")') write (*, '(" Christian Schlatter, student of physics, 4th year")') write (*, *) releaseinfo write (*, '(" ***************************************************************")') write (*, *) end if ! PDB input file collection write (*, '(" PDB File coordinates extractor")') write (*, *) write (*, '(" How many PDB files would you like to process ?")') write (*, *) write (*, '(" -> Type 1 for a single file or")') write (*, '(" -> Type 2 for a list of files provided in Files.ini")') write (*, *) write (*, '(" Please choose your option now : ")', advance = 'no') read (*,*) resourceflag write (*, *) if (resourceflag == 2) then ! Batch mode using Files.ini write (*, '(" Opening Files.ini... -> ")', advance = 'no') open (UNIT=1, FILE='Files.ini', FORM='FORMATTED', STATUS='OLD') rewind 1 numberfiles = 0 do ! Determination of the number of PDB files read (1, '(a12)', IOSTAT = io) dummy if (io == 0) then numberfiles = numberfiles + 1 else exit end if end do allocate (filename(numberfiles)) rewind 1 do i = 1, numberfiles ! Determination of the PDB file names read (1, '(a12)') filename(i) end do close (UNIT=1, STATUS='KEEP') write (*, '(i10, " files to process")') numberfiles write (*,*) else allocate (filename(1)) ! User provided PDB file name write (*, '(" Please provide the filename of the PDB file : ")', advance = 'no') read (*, *) filename(1) write (*, *) numberfiles = 1 endif allocate (goodfiles(numberfiles)) goodfiles = .true. ! Atom type to be extracted input = ' ' write (*, *) write (*, '(" Which type of atoms would you like to extract ?")') write (*, *) write (*, '(" Example : For alpha carbons type the two characters CA")') write (*, *) write (*, '(" Please choose your atom now : ")', advance = 'no') read (*, *) input atomtype(1:1) = " " do i = 1, 3 atomtype(i+1:i+1) = input(i:i) end do ! Special tertiary configuration selection write (*, *) write (*, '(" What kind of information would you like to extract ?")') write (*, *) write (*, '(" -> Type 1 for coordinates of atoms located in alpha helices.")') write (*, '(" -> Type 2 for coordinates of atoms of the whole backbone of the protein.")') write (*, *) write (*, '(" Please choose your option now : ")', advance = 'no') read (*, *) what do i = 1, numberfiles ! Processing file by file if (talkmode) then write (*, *) write (*, *) write (*, '(" Extracting structure information from file ", a12)') filename(i) write (*, '(" *******************************************************")') write (*, *) write (*,'(" Processing now file ", a12)') filename(i) write (*, *) end if position = scan(filename(i),'.') select case (what) case (1) ! Extracting helix subsequences only call helices(subfolder//'/'//filename(i), resolution, alphacarbons, talkmode, atomtype) filename(i)(position+1:position+1) = 'h' filename(i)(position+2:position+2) = 'e' filename(i)(position+3:position+3) = 'l' case (2) ! Extracting the whole backbone of the protein call backbones(subfolder//'/'//filename(i), resolution, alphacarbons, talkmode, atomtype) filename(i)(position+1:position+1) = 'b' filename(i)(position+2:position+2) = 'c' filename(i)(position+3:position+3) = 'k' case default end select if (.not.(associated(alphacarbons))) then ! This may occur when no alpha helices were present in the selected protein if (talkmode) then write (*, '(" This file will be ignored. Please check if it is applicable.")') write (*, *) pause else write (*, '("File ", a8, " has not been created. No helices present.")') filename(i) end if goodfiles(i) = .false. cycle end if ! Saving the gathered informations to disk write (*, *) write (*, '(" Exporting C-alpha coordinates to file ", a12)') filename(i) if (talkmode) then write (*, '(" **************************************************")') write (*, *) end if open (UNIT=2, FILE=exportfolder//'/'//filename(i), FORM='FORMATTED', STATUS='REPLACE') ! Definition of the C-alpha circle input file header write(2, '("Atom cartesian coordinates from PDB")') write(2, '("C-alpha-circle project. File created using Extractor.")') write(2, '(a25)', advance='no') releaseinfo write(2, '(". © 2002 EPFL Lausanne")') write(2, '("Atom type : ", a4)') atomtype write(2, '("Resolution [Å] : ", f4.2)') resolution write(2, *) write(2, '("Entry x [Å] y [Å] z [Å] residue chain")') write(2, *) do j = 1, size (alphacarbons) write (2, '(i5, 3f10.3, i9, a7)' ) j, alphacarbons(j)%coordinates(1), & alphacarbons(j)%coordinates(2), & alphacarbons(j)%coordinates(3), & alphacarbons(j)%residue, & alphacarbons(j)%chainident end do close (UNIT=2, STATUS='KEEP') deallocate (alphacarbons) end do ! Protocolling of which files were processed sucessfully open (UNIT=3, FILE = 'goodfiles.txt', FORM='FORMATTED', STATUS='REPLACE') do i = 1, numberfiles if (goodfiles(i)) then write (3, *) filename(i) end if end do close (UNIT=3, STATUS='KEEP') deallocate (filename) write(*, *) write(*,'(" DONE ! ")') write(*, *) end program extract