diff --git a/ericscript.pl b/ericscript.pl
new file mode 100755
index 0000000..6e75e07
--- /dev/null
+++ b/ericscript.pl
@@ -0,0 +1,557 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+use Pod::Usage;
+use Getopt::Long;
+use File::Spec;
+use Cwd 'abs_path';
+
+our ($verbose, $help, $man);
+our ($samplename, $reads_1, $reads_2, $outputfolder, $minreads, $removetemp, $nthreads, $MAPQ, $checkdb, $demo, $refid, $printdb, $downdb, $dbfolder, $recalc, $bwa_aln, $genomeref);
+our ($simulator, $readlength, $ntrim, $insize, $sd_insize, $ngenefusion, $min_cov, $max_cov, $nsims, $be, $ie, $background_1, $background_2, $nreads_background, $ensversion);
+our($calcstats, $resultsfolder, $datafolder, $algoname, $dataset, $normroc);
+my @command_line = @ARGV;
+GetOptions('verbose|v'=>\$verbose, 
+'help|h'=>\$help, 
+'man|m'=>\$man, 
+'samplename|name=s'=>\$samplename,
+'outputfolder|o=s'=>\$outputfolder, 
+'dbfolder|db=s'=>\$dbfolder, 
+'background_1=s'=>\$background_1, 
+'background_2=s'=>\$background_2, 
+'refid=s'=>\$refid, 
+'minreads|minr=i'=>\$minreads,
+'remove'=>\$removetemp,
+'nthreads|p=i'=>\$nthreads,
+'readlength|rl=i'=>\$readlength,
+'ntrim=i'=>\$ntrim,
+'insize=f'=>\$insize,
+'sd_insize=f'=>\$sd_insize,
+'ngenefusion=i'=>\$ngenefusion,
+'min_cov=i'=>\$min_cov,
+'max_cov=i'=>\$max_cov,
+'nsims=i'=>\$nsims,
+'nreads_background=i'=>\$nreads_background,
+'checkdb'=>\$checkdb,
+'ie'=>\$ie,
+'be'=>\$be,
+'demo'=>\$demo,
+'simulator'=>\$simulator,
+'recalc'=>\$recalc,
+'printdb'=>\$printdb,
+'checkdb'=>\$checkdb,
+'downdb'=>\$downdb,
+'bwa_aln'=>\$bwa_aln,
+'calcstats'=>\$calcstats,
+'resultsfolder=s'=>\$resultsfolder, 
+'datafolder=s'=>\$datafolder, 
+'algoname=s'=>\$algoname, 
+'dataset=s'=>\$dataset, 
+'normroc=i'=>\$normroc,
+'ensversion=i'=>\$ensversion,
+'MAPQ=f'=>\$MAPQ) or pod2usage ();
+
+$help and pod2usage (-verbose=>1, -exitval=>1, -output=>\*STDOUT);
+$man and pod2usage (-verbose=>2, -exitval=>1, -output=>\*STDOUT);
+
+my $sysmsg;
+my $sysflag=0;
+$sysmsg = qx/samtools 2>&1/ || '';		
+if ($sysmsg !~ m/bamshuf/) {
+	print STDERR "[EricScript] Error: SAMtools >= 0.1.19 not found! Please install and add it to your PATH.\n";
+	$sysflag=1;
+}
+if ($sysmsg =~ m/htslib/) {
+    print STDERR "[EricScript] Error: SAMtools >= 1.0 detected! EricScript is not yet compatible with it. Please use samtools 0.1.19 to run EricScript.\n";
+    $sysflag=1;
+}
+$sysmsg = qx/bwa 2>&1/ || '';		
+if ($sysmsg !~ m/sampe/) {
+	print STDERR "[EricScript] Error: BWA not found! Please install and add it to your PATH.\n";
+	$sysflag=1;
+} else {
+	$sysmsg = qx/bwa mem 2>&1/ || '';
+	if ($sysmsg !~ m/-Y/) {
+	    print STDERR "[EricScript] Error: BWA >= 0.7.12 not found! Please install and add it to your PATH.\n";
+	    $sysflag=1;
+	}
+}
+#} elsif ($sysmsg !~ m/mem/) {
+#    print STDERR "[EricScript] Error: BWA >= 0.7.4 not found! Please install and add it to your PATH.\n";
+#    $sysflag=1;
+#}
+$sysmsg = qx/blat 2>&1/ || '';		
+if ($sysmsg !~ "tileSize") {
+	print STDERR "[EricScript] Error: BLAT not found! Please install and add it to your PATH.\n";
+	$sysflag=1;
+}
+$sysmsg = qx/R --help 2>&1/ || '';		
+if ($sysmsg !~ "Rdiff") {
+	print STDERR "[EricScript] Error: R not found! Please install and add it to your PATH.\n";
+	$sysflag=1;
+}
+$sysmsg = qx/seqtk 2>&1/ || '';
+if ($sysmsg !~ "subseq") {
+    print STDERR "[EricScript] Error: Seqtk not found! Please install and add it to your PATH.\n";
+    $sysflag=1;
+}
+$sysmsg = qx/bedtools 2>&1/ || '';
+if ($sysmsg !~ "sample") {
+    print STDERR "[EricScript] Error: bedtools (>=2.18) not found! Please install and add it to your PATH.\n";
+    $sysflag=1;
+}
+
+if ($sysflag == 1) {
+	exit(100);
+}
+
+my $myscript = abs_path($0);
+my $myscriptname = $myscript; $myscriptname =~ s%.*/%%g;
+my $ericscriptfolder=substr $myscript, 0, (length($myscript) - length($myscriptname) - 1) ;
+
+$calcstats ||= 0;
+$simulator ||= 0;
+$checkdb ||= 0;
+$demo ||= 0;
+$printdb ||=0;
+$downdb ||=0;
+$recalc ||=0;
+$bwa_aln ||= 0;
+$ensversion ||=0;
+#if ($bwa_aln != 0) {
+#	$bwa_aln = 1;
+#}
+$dbfolder ||= "$ericscriptfolder/lib";
+
+if ( ($ensversion > 0) && ($ensversion < 70) ) {
+	pod2usage ("[EricScript] Error in selecting Ensembl release. Minimum supported version is 70.\n");
+}
+
+if ($downdb != 0) {
+	$refid or pod2usage ("[EricScript] Syntax error. \"refid\" parameter needs to be specified. Run ericscript.pl --printdb to see the list of available refid.\n");
+} else {
+	$refid ||= "homo_sapiens";
+}
+
+if ($printdb != 0) {
+    system("R --slave --args $ericscriptfolder,$dbfolder,$ensversion,$printdb < $ericscriptfolder/lib/R/RetrieveRefId.R");
+	exit(100);
+}
+
+if ($recalc != 0) {
+    system("R --slave --args $ericscriptfolder,$refid,$dbfolder < $ericscriptfolder/lib/R/CheckDB.R");
+    my $flagdb;
+    open FILE, "< $ericscriptfolder/lib/data/_resources/.flag.dbexists";
+    $flagdb = <FILE>;
+    if ($flagdb == 0) {
+        pod2usage ();
+    }
+
+     	## check inputs !! ONLY FOR DEBUG purposes
+    my $file3 = File::Spec->catfile ($genomeref);
+    -f $file3 or pod2usage ("[EricScript] Error: please specify a genome reference file.\n");
+    my $file4 = File::Spec->catfile ("$genomeref.pac");
+    -f $file4 or pod2usage ("[EricScript] Error: BWA indexes for $genomeref not found. Create BWA indexes then run EricScript.\n");
+	$outputfolder or pod2usage ("[EricScript] Error: Please specify where past analysis is stored by using --outputfolder");
+#	$outputfolder = abs_path($outputfolder);
+	if (-d $outputfolder) { 
+		my $abs_outputfolder = abs_path($outputfolder);
+		system("R --slave --args $ericscriptfolder,$abs_outputfolder,$dbfolder,$refid,$genomeref < $ericscriptfolder/lib/R/CalcBreakpointPositions.R");	
+    } else {
+        die "[EricScript] Error: output folder $outputfolder does not exist.\n";
+	}
+
+}
+
+if ($checkdb == 0 && $demo == 0 && $simulator == 0 && $calcstats == 0 && $downdb == 0 && $recalc == 0) {
+	
+	@ARGV or pod2usage (-verbose=>0, -exitval=>1, -output=>\*STDOUT);
+	@ARGV == 2 or pod2usage ("[EricScript] Syntax error.\n");
+	($reads_1, $reads_2) = @ARGV;
+
+		## check db existence
+	system("R --slave --args $ericscriptfolder,$refid,$dbfolder < $ericscriptfolder/lib/R/CheckDB.R");
+	my $flagdb;
+	open FILE, "< $ericscriptfolder/lib/data/_resources/.flag.dbexists";
+	$flagdb = <FILE>;
+	if ($flagdb == 0) {
+		exit(100);
+	}	
+
+		## check inputs
+	my $file1 = File::Spec->catfile ($reads_1);
+	-f $file1 or pod2usage ("[EricScript] Error: the required file $reads_1 does not exist.\n");
+	my $file2 = File::Spec->catfile ($reads_2);
+    -f $file2 or pod2usage ("[EricScript] Error: the required file $reads_2 does not exist.\n");
+#	my $file3 = File::Spec->catfile ($genomeref);
+#    -f $file3 or pod2usage ("[EricScript] Error: please specify a valid genome reference file.\n");
+#	my $file4 = File::Spec->catfile ("$genomeref.pac");
+#    -f $file4 or pod2usage ("[EricScript] Error: BWA indexes for $genomeref not found. Create BWA indexes then run EricScript.\n");
+
+
+	my $userhome = $ENV{HOME}; 
+	$samplename ||= 'MyEric';
+	$outputfolder ||= "$userhome/$samplename";
+#	$outputfolder = abs_path($outputfolder);
+#	if (-d $outputfolder) {
+#		die "[EricScript] Error: output folder $outputfolder already exists.\n";
+#	}
+	if (! -d $outputfolder) { mkdir($outputfolder) || die "[EricScript] Error: the directory $outputfolder is not writable by the current user. \n"; }
+	if (! -d "$outputfolder/aln") { mkdir("$outputfolder/aln"); }
+	if (! -d "$outputfolder/out") { mkdir("$outputfolder/out"); }
+	$verbose ||= 0;			
+	$minreads ||= 3;
+	$ntrim ||= -1;
+	$removetemp ||= 0;
+	$MAPQ ||= 20;
+	$nthreads ||= 4;
+	my $myref="$dbfolder/data/$refid/EnsemblGene.Reference.fa";
+	my $mynewref="$outputfolder/out/$samplename.EricScript.junctions.fa";
+	my $mynewref_recal="$outputfolder/out/$samplename.EricScript.fa";
+	my $flagbin= 1;
+	if (-T $reads_1) {
+		$flagbin = 0;
+	}
+	## write vars
+	my $abs_outputfolder = abs_path($outputfolder);
+	my $range = 10000;
+	my $rnum = int(rand($range));
+	my $varfile = "$outputfolder/ericscript.vars";
+	open(FILE, ">", "$varfile") or die "Couldn't open: $!";
+	print FILE "samplename=\"$samplename\"\n";
+	print FILE "outputfolder=\"$abs_outputfolder\"\n";
+	print FILE "dbfolder=\"$dbfolder\"\n";
+	print FILE "reads_1=\"$reads_1\"\n";
+	print FILE "reads_2=\"$reads_2\"\n";
+	print FILE "minreads=$minreads\n";
+	print FILE "ntrim=$ntrim\n";
+	print FILE "MAPQ=$MAPQ\n";
+	print FILE "removetemp=$removetemp\n";
+	print FILE "flagbin=$flagbin\n";
+	print FILE "nthreads=$nthreads\n";
+	print FILE "ericscriptfolder=\"$ericscriptfolder\"\n";
+	print FILE "myref=\"$myref\"\n";
+	print FILE "mynewref=\"$mynewref\"\n";
+	print FILE "mynewref_recal=\"$mynewref_recal\"\n";
+	print FILE "refid=\"$refid\"\n";
+	print FILE "verbose=$verbose\n";
+	print FILE "bwa_aln=$bwa_aln\n";
+	close (FILE); 
+	
+	system("cp", "$varfile", "$outputfolder/out/.ericscript.vars");
+	system("bash", "$ericscriptfolder/lib/bash/RunEric.sh", "$outputfolder");
+} elsif ($checkdb != 0) {
+	if (-d $dbfolder) {
+		print STDERR "[EricScript] Checking installed Database.\n";
+		system("R --slave --args $ericscriptfolder,$dbfolder,$ensversion,$printdb < $ericscriptfolder/lib/R/RetrieveRefId.R");
+		system("R --slave --args $ericscriptfolder,$dbfolder < $ericscriptfolder/lib/R/UpdateDB.R");
+		exit(100);
+	} else {
+		die "[EricScript] Error: the directory $dbfolder does not exist.\n";
+	}
+} elsif ($downdb != 0) {
+	if ( -d $dbfolder ) {
+		if ( !-d "$dbfolder/data" ) {
+			mkdir ("$dbfolder/data");
+		}
+	system("R --slave --args $ericscriptfolder,$dbfolder,$ensversion,$printdb < $ericscriptfolder/lib/R/RetrieveRefId.R");
+	system("bash $ericscriptfolder/lib/bash/BuildSeq.sh $ericscriptfolder $refid $dbfolder $ensversion");	
+	} else {
+		die "[EricScript] Error: the directory $dbfolder does not exist.\n";
+	}
+
+} elsif ($demo != 0) {
+        ## check db
+#    my $file3 = File::Spec->catfile ($genomeref);
+#    -f $file3 or die ("[EricScript] Error: please specify a genome reference file.\n");
+    system("R --slave --args $ericscriptfolder,$refid,$dbfolder < $ericscriptfolder/lib/R/CheckDB.R");
+    my $flagdb;
+    open FILE, "< $ericscriptfolder/lib/data/_resources/.flag.dbexists";
+    $flagdb = <FILE>;
+    if ($flagdb == 0) {
+        pod2usage ();
+    }
+	my $userhome = $ENV{HOME}; 
+	$samplename ='demo';
+	$outputfolder ||= "$userhome/ericscript_demo";
+#    $outputfolder = abs_path($outputfolder);
+#	if (-d $outputfolder) {
+#		die "[EricScript] Error: output folder $outputfolder already exists.\n";
+#	}	
+	if (! -d $outputfolder) { mkdir($outputfolder) || die "[EricScript] Error: the directory $outputfolder is not writable by the current user. \n"; }
+	mkdir("$outputfolder/aln");
+	mkdir("$outputfolder/out");	
+	$verbose=0;			
+	$minreads ||= 3;
+	$ntrim ||= -1;
+	$removetemp ||= 0;
+	$MAPQ ||= 20;
+	$nthreads ||= 4;
+	$refid ||= "homo_sapiens";
+	my $myref="$dbfolder/data/$refid/EnsemblGene.Reference.fa";
+	my $mynewref="$outputfolder/out/$samplename.EricScript.junctions.fa";
+	my $mynewref_recal="$outputfolder/out/$samplename.EricScript.fa";
+	my $reads_1="$ericscriptfolder/lib/demo/myreads_1.fq.gz";
+	my $reads_2="$ericscriptfolder/lib/demo/myreads_2.fq.gz";	
+	my $flagbin= 1;
+	if (-T $reads_1) {
+		$flagbin = 0;
+	}
+	my $abs_outputfolder = abs_path($outputfolder);
+    my $range = 10000;
+    my $rnum = int(rand($range));
+    my $varfile = "$outputfolder/ericscript.vars";
+	open(FILE, ">", "$varfile") or die "Couldn't open: $!";
+	print FILE "samplename=\"$samplename\"\n";
+	print FILE "outputfolder=\"$abs_outputfolder\"\n";
+	print FILE "dbfolder=\"$dbfolder\"\n";
+	print FILE "reads_1=\"$reads_1\"\n";
+	print FILE "reads_2=\"$reads_2\"\n";
+	print FILE "flagbin=$flagbin\n";
+	print FILE "minreads=$minreads\n";
+	print FILE "ntrim=$ntrim\n";
+	print FILE "MAPQ=$MAPQ\n";
+	print FILE "removetemp=$removetemp\n";
+	print FILE "nthreads=$nthreads\n";
+	print FILE "ericscriptfolder=\"$ericscriptfolder\"\n";
+	print FILE "myref=\"$myref\"\n";
+	print FILE "mynewref=\"$mynewref\"\n";
+	print FILE "mynewref_recal=\"$mynewref_recal\"\n";
+	print FILE "refid=\"$refid\"\n";
+	print FILE "verbose=$verbose\n";
+	print FILE "bwa_aln=$bwa_aln\n";
+	close (FILE); 
+	
+	system("cp", "$varfile", "$outputfolder/out/.ericscript.vars");
+	system("bash", "$ericscriptfolder/lib/bash/RunEric.sh", "$outputfolder");
+	
+} 
+elsif ($simulator != 0) {
+	$sysmsg = qx/wgsim --help 2>&1/ || '';	
+	my $userhome = $ENV{HOME}; 
+	if ($sysmsg !~ "outer") {
+    	print STDERR "[EricScript] Error: wgsim not found! Please install and add it to your PATH.\n";
+    	$sysflag=1;
+	}
+	if ($sysflag == 1) {
+    	exit(100);
+	}
+    $outputfolder ||= "$userhome/ericscript_simulator";
+#    $outputfolder = abs_path($outputfolder);
+#	if (-d $outputfolder) {  
+#        die "[EricScript] Error: output folder $outputfolder already exists.\n";
+#   }
+print "line 345;";
+    if (! -d $outputfolder ) { mkdir($outputfolder) || die "[EricScript] Error: the directory $outputfolder is not writable by the current user. \n"; }
+print "line 347 ($outputfolder);";
+    $verbose ||= 0;
+    $readlength ||= 75;
+	$insize ||= 200;
+	$sd_insize ||= 50;
+	$ngenefusion ||= 50;
+	$min_cov ||= 1;
+	$max_cov ||= 50;
+	$nsims ||= 10;
+	$be ||= 0;
+	$ie ||= 0;
+	$refid ||= "homo_sapiens";
+	if ($be == 0 && $ie == 0) {
+		$ie=1;
+	}
+	$background_1 ||= 0;
+	$background_2 ||= 0;
+	$nreads_background ||= 200000;
+	my $abs_outputfolder = abs_path($outputfolder);
+	my $simcommand="R --slave --args $readlength,$abs_outputfolder,$ericscriptfolder,$verbose,$insize,$sd_insize,$ngenefusion,$min_cov,$max_cov,$nsims,$be,$ie,$background_1,$background_2,$nreads_background,$dbfolder,$refid < $ericscriptfolder/lib/R/SimulateFusions.R";
+	system($simcommand);	
+
+}
+elsif ($calcstats != 0) {
+
+	my $userhome = $ENV{HOME}; 
+	$outputfolder ||= "$userhome/ericscript_stats";
+#    $outputfolder = abs_path($outputfolder);
+  	if (-d $outputfolder) {  
+#  	    die "[EricScript] Error: output folder $outputfolder already exists.\n";
+ 	} else {
+	    mkdir($outputfolder) || die "[EricScript] Error: the directory $outputfolder is not writable by the current user. \n";	
+	}
+	$resultsfolder || pod2usage ("[EricScript] Error: Argument resultsfolder is not specified! \n");
+	$datafolder || pod2usage ("[EricScript] Error: Argument datafolder is not specified! \n");
+	$algoname || pod2usage ("[EricScript] Error: Argument algoname is not specified! \n");
+	$dataset || pod2usage ("[EricScript] Error: Argument dataset is not specified! \n");
+	$readlength || pod2usage ("[EricScript] Error: Argument readlength is not specified! \n");
+	$normroc ||= 1;
+    my $abs_outputfolder = abs_path($outputfolder);
+	-e $resultsfolder ||  die "[EricScript] Error: the folder $resultsfolder does not exist. \n";
+	-e $datafolder || die "[EricScript] Error: the folder $datafolder does not exist. \n";
+	my $datafolder1="$datafolder/$dataset";
+	-e $datafolder1 || die "[EricScript] Error: no $dataset synthetic data have been found in $datafolder. \n";
+
+	my $calcstatscommand="R --slave --args $resultsfolder,$abs_outputfolder,$datafolder,$algoname,$dataset,$readlength,$normroc,$ericscriptfolder < $ericscriptfolder/lib/R/CalcStats.R";
+	system($calcstatscommand);
+
+}
+
+
+=head1 SYNOPSIS
+ 
+ ericscript.pl [arguments] <reads_1.fq(.gz)> <reads_2.fq(.gz)>
+ 
+	Optional arguments:
+	-h, --help                      print help message
+	-m, --man                       print complete documentation
+	-v, --verbose                   use verbose output
+	-name, --samplename <string>	what's the name of your sample?
+	-o, --outputfolder <string>	where the results will be stored
+	-db, --dbfolder <string>	where database is stored. Default is ERICSCRIPT_FOLDER/lib/
+	-minr, --minreads <int>		minimum reads to consider discordant alignments [3]
+	-p, --nthreads <int>		number of threads for the bwa aln process [4]
+	-ntrim <int>			trim PE reads from 1st base to $ntrim. Default is no trimming. Set ntrim=0 to don't trim reads.
+	--MAPQ <int>			minimum value of mapping quality to consider discordant reads. For MAPQ 0 use a negative value [20]
+	--remove			remove all temporary files.
+	--demo				Run a demonstration of EricScript on simulated reads.
+ 	--refid				Genome reference identification. Run ericscript.pl --printdb to see available refid [homo_sapiens].
+	--bwa_aln			Use BWA ALN instead of BWA MEM to search for discordant reads.
+
+	Subcommands:
+	--checkdb			Check if your database is up-to-date, based on the latest Ensembl release.
+	--downdb			Download, build database. refid parameter need to be specified.
+	--simulator			Generate synthetic gene fusions with the same recipe of the ericscript's paper
+	--calcstats			Calculate the statistics that we used in our paper to evaluate the performance of the algorithms.
+	
+	--------
+	arguments for databases subcommands (downdb, checkdb):
+
+ 		-db, --dbfolder <string>	where database is stored. Default is ERICSCRIPT_FOLDER/lib/
+ 		--refid				Genome reference identification. Run ericscript.pl --printdb to see available refid [homo_sapiens].
+ 		--printdb			Print a list of available genomes and exit.
+		--ensversion		Download data of a specific Ensembl version (>= 70). Default is the latest one.
+ 
+	-------
+	arguments for simulator:
+ 		-o, --outputfolder <string>	where synthetic datasets will be stored [HOME/ericscript_simulator]
+		-rl, --readlength <int>		length of synthetic reads [75]
+	 	--refid				Genome reference identification. Run ericscript.pl --printdb to see available refid [homo_sapiens].
+		-v, --verbose               	use verbose output
+		--insize			parameter of wgsym. Outer distance between the two ends [200]
+		--sd_insize			parameter of wgsym. Standard deviation [50]
+		--ngenefusion			The number of synthetic gene fusions per dataset? [50]
+		--min_cov			Minimum coverage to simulate [1]
+		--max_cov			Maximum coverage to simulate [50]
+		--nsims				The number of synthetic datasets to simulate [10]
+		--be				Use --be to generate Broken Exons (BE) data [no]
+		--ie 				Use --ie to generate Intact Exons (IE) data [yes]
+		-db, --dbfolder			where database is stored. Default is ERICSCRIPT_FOLDER/lib/ 
+		--background_1			Fastq file (forward)  for generating background reads. 
+		--background_2			Fastq file (reverse) for generating background reads. 
+		--nreads_background		The number of reads to extract from background data [200e3].
+
+	-------
+	arguments for calcstats:
+ 		-o, --outputfolder <string>	where statistics file will be stored [HOME/ericscript_calcstats]
+		--resultsfolder <string>	path to folder containing algorithm results.
+		--datafolder <string>		path to folder containing synthetic data generated by ericscript simulator.
+		--algoname <string>		name of the algorithm that generated results. 
+		--dataset <string>		type of synthetic data to considered for calculating statistics. IE or BE? 
+		-rl, --readlength <int>		length of synthetic reads 
+		--normroc <int>		 	factor to normalize the score given by the algorithm.
+			
+ ericscript.pl automatically runs a pipeline to detect chimeric transcripts in
+ paired-end RNA-seq samples. It is also able to generate datasets with synthetic gene fusions.
+ More information about running EricScript Simulator and EricScript CalcStats can be 
+ found at http://ericscript.sourceforge.net
+ 
+ Version: 0.5.5b
+ 
+=head1 OPTIONS
+ 
+=over 8
+
+=item B<--help>
+ 
+ print a brief usage message and detailed explanation of options.
+ 
+=item B<--man>
+ 
+ print the complete manual of the program.
+ 
+=item B<--verbose>
+ 
+ use verbose output.
+ 
+ 
+=item B<--samplename>
+ 
+ Choose a name for your sample. Default is "MyEric"
+ 
+=item B<--outputfolder>
+ 
+ Folder that will contain all the results of the analysis. Default is YOUR_HOME/SAMPLENAME
+ 
+=item B<--dbfolder>
+ 
+ Folder that contains transcriptome sequences and information of the downloaded species. Default is
+ ERICSCRIPT_FOLDER/lib
+
+=item B<--minreads>
+ 
+ Minimum reads to consider discordant alignments. Default is 3 reads with minimum MAPQ.
+
+=item B<-ntrim>
+ 
+ trim PE reads from 1st base to $ntrim. Trimmed reads will be used only for the first alignment (identification 
+ of discordant reads). Setting ntrim to values lower than orginal read length allows EricScript to 
+ increase its sensitivity, especially when the length of reads is 75nt or 100 nt. 
+ Default is no trimming. Set ntrim=0 to don't trim reads.
+ 
+=item B<--nthreads >
+ 
+ Number of threads for the bwa aln process.
+ 
+=item B<--MAPQ >
+ 
+ minimum value of mapping quality to consider discordant reads. For MAPQ 0 use a negative value. Default is 20.
+ 
+=item B<--remove>
+ 
+ remove all temporary files. By default, all temporary files will be kept for 
+ user inspection, but this will easily clutter the directory.
+ 
+=item B<--checkdb>
+ 
+ Check if your database is up-to-date, based on the latest Ensembl release.
+
+=item B<--downdb>
+ 
+ Download, build database. refid parameter need to be specified.
+
+=item B<--refid>
+
+ Genome reference identification. Run ericscript.pl --printdb to see available refid.[homo_sapiens]
+
+=item B<--ensversion>
+    
+ Download data of a specific version of Ensembl. Default is downloading the latest version of Ensembl.
+ Minimum supported version is 70.
+
+=item B<--printdb>
+
+ Print a list of available genomes and exit.
+
+=item B<--demo>
+ 
+ Run a demonstration of EricScript on simulated reads.
+ 
+=back
+ 
+=head1 DESCRIPTION
+ 
+ EricScript (chimEric tranScript Detection Algorithm) is a computational framework for the discovery of 
+ gene fusion products in paired end RNA-seq data.
+ EricScript is freely available to the community for non-commercial use under GPLv3 license. For 
+ questions or comments, please contact matteo.benelli@gmail.com.
+ 
+ 
+=cut
diff --git a/lib/R/BuildExonUnionModel.R b/lib/R/BuildExonUnionModel.R
new file mode 100644
index 0000000..72ca1bf
--- /dev/null
+++ b/lib/R/BuildExonUnionModel.R
@@ -0,0 +1,93 @@
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+ericscriptfolder <- split.vars [1]
+refid <- split.vars[2]
+dbfolder <- split.vars [3]
+tmpfolder <- split.vars [4]
+
+
+formatfasta <- function(myfasta, step = 50) {  
+  totalchar <- nchar(myfasta)
+  if (totalchar > step) {
+    steps <- seq(1, totalchar, by = step)
+    newfasta <- rep("", (length(steps) - 1))
+    for (j in 1: (length(steps) - 1)) {
+      aa <- substr(myfasta, steps[j], (steps[j] + (step - 1)))
+      newfasta[j] <- aa 
+    }
+    if ((totalchar - tail(steps, n = 1)) > 0) {
+      newfasta <-  c(newfasta, substr(myfasta, steps[j+1], totalchar))
+    }
+  } else
+  {
+    newfasta <- substr(myfasta, 1, totalchar)
+  }
+  return(newfasta)
+}
+
+convertToComplement <- function(x) {
+  
+  bases <- c("A", "C", "G", "T")
+  #xx <- unlist(strsplit(toupper(x), NULL))
+  xx <- rev(unlist(strsplit(toupper(x), NULL)))
+  paste(unlist(lapply(xx, function(bbb) {
+    if (bbb=="A") compString <- "T"
+    if (bbb=="C") compString <- "G"
+    if (bbb=="G") compString <- "C"
+    if (bbb=="T") compString <- "A"
+    if (!bbb %in% bases) compString <- "N"
+    return(compString)
+  })), collapse="")
+  
+}
+
+refid.folder <- file.path(dbfolder, "data", refid)
+if (file.exists(refid.folder) == F) {
+  dir.create(refid.folder)
+}
+x <- scan(file.path(tmpfolder, "subseq.fa"), what = "", quiet = T)
+x.bed <- read.delim(file.path(tmpfolder, "exonstartend.mrg.txt"), sep = "\t", header = F)
+refid.bed <- paste(as.character(x.bed[[1]]), paste((as.numeric(as.character(x.bed[[2]])) + 1), as.character(x.bed[[3]]), sep = "-"), sep = ":")
+tmp <- grep(">", x)
+genomicreg <- substr(x[tmp], 2, nchar(x[tmp]))
+sequences.tmp <- rep("", length(tmp))
+for (i in 1: (length(tmp) - 1)) { 
+  sequences.tmp[i] <- gsub(", ", "", toString(x[(tmp[i] + 1):(tmp[i+1] - 1)]))
+}
+sequences.tmp[length(tmp)] <- gsub(", ", "", toString(x[(tmp[length(tmp)] + 1): length(x)]))
+genenames.tmp <- as.character(x.bed[[4]])
+unique.genenames <- unique(genenames.tmp)
+strand.tmp <- read.delim(file.path(tmpfolder, "strand.txt"), sep = "\t", header = F)
+strand <- strand.tmp[[2]]
+sequences <- rep("", length(unique.genenames))
+for (i in 1: length(unique.genenames)) {
+    genenames1 <- paste(">", unique.genenames[i], sep = "")
+    ix.gene <- which(genenames.tmp == unique.genenames[i])
+    ix.refid <- which(genomicreg %in% refid.bed[ix.gene])
+    if (strand[i] == "-1") {
+      seqtmp0 <- gsub(", ", "", toString(sequences.tmp[ix.refid]))
+      sequences[i] <- convertToComplement(seqtmp0)
+    } else {
+      sequences[i] <- gsub(", ", "", toString(sequences.tmp[ix.refid]))
+    }
+    if (nchar(sequences[i])  == 0) {
+      sequences[i] <- "NNNNNNN"
+    }
+      
+    if (i == 1) {
+      cat(genenames1, file = file.path(refid.folder, "EnsemblGene.Reference.fa"), append = F, sep = "\n")
+    } else {
+      cat(genenames1, file = file.path(refid.folder, "EnsemblGene.Reference.fa"), append = T, sep = "\n")
+    }
+    cat(formatfasta(sequences[i]), file = file.path(refid.folder, "EnsemblGene.Reference.fa"), append = T, sep = "\n")  
+}
+ix.emptyseq <- which(nchar(sequences) == 0) 
+GeneNames <- unique.genenames
+if (length(ix.emptyseq) > 0) {
+GeneNames <- GeneNames[-ix.emptyseq]
+sequences <- sequences[-ix.emptyseq]
+}
+save(GeneNames, file = file.path(refid.folder, "EnsemblGene.GeneNames.RData"))
+save(sequences, file = file.path(refid.folder, "EnsemblGene.Sequences.RData"))
+
diff --git a/lib/R/BuildFasta.R b/lib/R/BuildFasta.R
new file mode 100644
index 0000000..451571c
--- /dev/null
+++ b/lib/R/BuildFasta.R
@@ -0,0 +1,101 @@
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+samplename <- split.vars [1]
+outputfolder <- split.vars[2]
+ericscriptfolder <- split.vars[3]
+readlength <- max(as.numeric(split.vars[4]))
+refid <- as.character(split.vars[5])
+dbfolder <- as.character(split.vars[6])
+
+
+formatfasta <- function(myfasta, step = 50) {
+	
+	totalchar <- nchar(myfasta)
+	if (totalchar > step) {
+		steps <- seq(1, totalchar, by = step)
+		newfasta <- rep("", (length(steps) - 1))
+		for (j in 1: (length(steps) - 1)) {
+			aa <- substr(myfasta, steps[j], (steps[j] + (step - 1)))
+			newfasta[j] <- aa 
+		}
+		if ((totalchar - tail(steps, n = 1)) > 0) {
+			newfasta <-  c(newfasta, substr(myfasta, steps[j+1], totalchar))
+		}
+	} else
+	{
+		newfasta <- substr(myfasta, 1, totalchar)
+	}
+	return(newfasta)
+}
+
+
+load(file.path(outputfolder,"out", paste(samplename,".chimeric.RData", sep = "")))
+load(file.path(dbfolder, "data", refid, "EnsemblGene.GeneNames.RData"))
+load(file.path(dbfolder, "data", refid, "EnsemblGene.Sequences.RData"))
+load(file.path(dbfolder, "data", refid, "EnsemblGene.Structures.RData"))
+load(file.path(outputfolder, "out", paste(samplename,".chimeric.RData", sep = "")))
+id1 <- MyGF$id1
+id2 <- MyGF$id2
+junctions <- rep(NA, length(id1))
+ids_fasta <- rep("", length(id1))
+sequences.fasta <- rep("", length(id1))
+fasta.file <- c()
+maxgap <- 300
+for (i in 1: length(id1)) {
+	ix.genetable1 <- which(EnsemblGene.Structures$EnsemblGene == id1[i])
+	ix.genetable2 <- which(EnsemblGene.Structures$EnsemblGene == id2[i])
+	ix.gene1 <- which(GeneNames == id1[i])
+	ix.gene2 <- which(GeneNames == id2[i])
+	min.pos1 <- min(MyGF$pos1[[i]]) - 2*readlength
+	max.pos1 <- max(MyGF$pos1[[i]]) + readlength - 1
+	if (min.pos1 < 1) {min.pos1 <- 1}
+	min.pos2 <- min(MyGF$pos2[[i]])
+	max.pos2 <- max(MyGF$pos2[[i]]) + 2*readlength
+	a <- as.numeric(unlist(strsplit(as.character(EnsemblGene.Structures$exonStart[ix.genetable1]), ",")))
+	b <- as.numeric(unlist(strsplit(as.character(EnsemblGene.Structures$exonEnd[ix.genetable1]), ",")))
+	strand1 <- as.character(EnsemblGene.Structures$Strand[ix.genetable1])
+	if (strand1 == "+") {
+		tmp.sum1 <- cumsum((b - a ))
+	} else {
+		tmp.sum1 <- cumsum(rev(b - a))		
+	}
+	exonenumber1 <- which(tmp.sum1 >= max.pos1)[1]
+	if (is.na(exonenumber1)) {exonenumber1 <- length(tmp.sum1)}
+	a2 <- as.numeric(unlist(strsplit(as.character(EnsemblGene.Structures$exonStart[ix.genetable2]), ",")))
+	b2 <- as.numeric(unlist(strsplit(as.character(EnsemblGene.Structures$exonEnd[ix.genetable2]), ",")))
+	strand2 <- as.character(EnsemblGene.Structures$Strand[ix.genetable2])
+	if (strand2 == "+") {
+		tmp.sum2 <- cumsum((b2 - a2))
+	} else {
+		tmp.sum2 <- cumsum(rev(b2 - a2))
+	}
+	exonenumber2 <- which(tmp.sum2 >= min.pos2)[1]
+	if (is.na(exonenumber2)) {exonenumber2 <- length(tmp.sum2)}
+	id.gf1 <- paste(id1[i], exonenumber1, sep = "_")
+	fasta.gf1.tmp0 <- sequences[ix.gene1]
+	start.end.exons <- c(0,tmp.sum1)
+	fasta.gf1 <- substr(fasta.gf1.tmp0, min.pos1, (max.pos1 + maxgap - 1))	
+	id.gf2 <- paste(id2[i], exonenumber2, sep = "_")
+	fasta.gf2.tmp0 <- sequences[ix.gene2]
+	if (max.pos2 > nchar(fasta.gf2.tmp0)) {max.pos2 <- nchar(fasta.gf2.tmp0)}
+	start.end.exons <- c(0,tmp.sum2)
+	fasta.gf2 <- substr(fasta.gf2.tmp0, (min.pos2 - maxgap), max.pos2)
+	id.fastaGF <- paste(">",id.gf1,"----",id.gf2," junction@",nchar(fasta.gf1),sep = "")
+	sequences.fasta[i] <- paste(fasta.gf1, fasta.gf2, sep = "")
+	fasta.gf12 <- formatfasta(sequences.fasta[i])
+	ids_fasta[i] <- paste(id.gf1,id.gf2, sep = "----")
+	junctions[i] <- nchar(fasta.gf1)
+	fastaGF <- c(id.fastaGF, fasta.gf12)
+	fasta.file <- c(fasta.file, fastaGF)
+}
+save(junctions, file = file.path(outputfolder, "out", paste(samplename,".junctions.RData", sep = "")))
+save(sequences.fasta, file = file.path(outputfolder, "out", paste(samplename,".sequences_fasta.RData", sep = "")))
+save(ids_fasta, file = file.path(outputfolder, "out",  paste(samplename, ".ids_fasta.RData", sep = "")))
+cat(fasta.file, file = file.path(outputfolder,"out", paste(samplename,".EricScript.junctions.fa",sep = "")), sep = "\n")
+
+
+
+
+
+
diff --git a/lib/R/BuildNeighbourhoodSequences.R b/lib/R/BuildNeighbourhoodSequences.R
new file mode 100644
index 0000000..028ad01
--- /dev/null
+++ b/lib/R/BuildNeighbourhoodSequences.R
@@ -0,0 +1,37 @@
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+samplename <- split.vars [1]
+outputfolder <- split.vars[2]
+z <- read.delim(file.path(outputfolder,"out",paste(samplename,".intervals.pileup", sep = "")), sep = "\t", header = F)
+load(file.path(outputfolder,"out",paste(samplename,".ids_filtered.RData", sep = "")))
+load(file.path(outputfolder,"out",paste(samplename,".junctions.recalibrated.RData", sep = "")))
+load(file.path(outputfolder, "out",  paste(samplename, ".ids_fasta.RData", sep = "")))
+id.pileup <- as.character(z[,1])
+pos.pileup <-  as.numeric(as.character(z[,2]))
+sequence.pileup <- as.character(z[,3])
+unique.ids.pileup <- unique(id.pileup)
+width <- 100
+fasta.file <-  c()
+for (i in 1:length(id.filtered)) {
+	ix.id <- which(id.pileup == id.filtered[i])
+	ix.ref <- which(ids_fasta == id.filtered[i])
+	junction <- junctions.recalibrated[ix.ref]
+	ix.id.pileup <- which(id.pileup == id.filtered[i])
+	ix.junction1 <- which(pos.pileup[ix.id.pileup] == junction)
+	ix.junction2 <- which(pos.pileup[ix.id.pileup] == (junction + 1))
+	seq.vec <- rep("N", width)
+	pos.seq <- seq.int((junction-(width/2-1)), (junction + (width/2)))
+	ix.pos.tmp <- which(pos.seq %in% pos.pileup[ix.id.pileup])
+	seq.vec[ix.pos.tmp] <- sequence.pileup[ix.id.pileup]
+	if ((length(ix.junction1)!=0) & (length(ix.junction2)!=0)) {
+		query.sequence <- character(length = 1)
+		for (ii in 1:length(seq.vec)) {
+			query.sequence <- paste(query.sequence, seq.vec[ii], sep = "")
+		}
+		ids_fasta_query <-  paste(">", id.filtered[i],sep = "")
+		fasta.file <-  c(fasta.file, c(ids_fasta_query, query.sequence))
+	}
+}
+cat(fasta.file, sep = "\n", file = file.path(outputfolder, "out",paste(samplename,".checkselfhomology.fa", sep = "")))
+cat(file.path(outputfolder, "out", paste(samplename,".checkselfhomology.fa", sep = "")), file = file.path(outputfolder, "out", ".link"))
diff --git a/lib/R/CalcBreakpointPositions.R b/lib/R/CalcBreakpointPositions.R
new file mode 100644
index 0000000..cc01280
--- /dev/null
+++ b/lib/R/CalcBreakpointPositions.R
@@ -0,0 +1,348 @@
+## re-calculate breakpoints positions for samples analysed with ericscript < 0.4.0
+## and re-estimation of ericscore and blacklist
+
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+ericscriptfolder <- as.character(split.vars[1])
+outputfolder <- split.vars [2]
+dbfolder <- split.vars [3]
+refid <- as.character(split.vars[4])
+genomeref <- as.character(split.vars[5])
+
+flag.ada <- require(ada, quietly = T)
+if (flag.ada == F) {
+  require(kernlab, quietly = T)  
+}
+load(file.path(ericscriptfolder, "lib","data", "_resources", "BlackList.RData"))
+load(file.path(ericscriptfolder, "lib","data", "_resources", "DataModel.RData"))
+load(file.path(dbfolder, "data", refid, "EnsemblGene.Structures.RData"))
+
+myls <- list.files(outputfolder, pattern = "Summary.RData")
+myls.tsv.total <- list.files(outputfolder, pattern = ".results.total.tsv")
+myls.tsv.filt <- list.files(outputfolder, pattern = ".results.filtered.tsv")
+
+if (length(myls) == 1 & length(myls.tsv.total) == 1 & length(myls.tsv.filt)) {
+  samplename <- gsub(".results.total.tsv", "", myls.tsv.total)  
+  load(file.path(outputfolder, myls))
+  cat(paste("[EricScript] Re-estimating EricScore for sample ", samplename, "... ", sep = ""))
+  
+  ensgenename1 <- as.character(SummaryMat$EnsemblGene1)
+  ensgenename2 <- as.character(SummaryMat$EnsemblGene2)
+  genename1 <- as.character(SummaryMat$GeneName1)
+  genename2 <- as.character(SummaryMat$GeneName2)
+  gjs.score <- as.numeric(as.character(SummaryMat$GJS))
+  edge.score <- as.numeric(as.character(SummaryMat$ES))
+  nreads.score <- as.numeric(as.character(SummaryMat$US))
+  cov.score <- as.numeric(as.character(SummaryMat$GeneExpr_Fused))
+  myscores <- cbind(gjs.score, edge.score, nreads.score, cov.score)
+  colnames(myscores) <- c("probs.gjs", "probs.es", "probs.us", "cov")
+  myscores <- data.frame(myscores)
+  if (flag.ada) {
+    myada <- ada(control~., data = DataScores,loss="exponential", nu = 0.1)
+    ericscore <- as.numeric(predict(myada, myscores, type = "probs")[,2])
+  } else {
+    sig <- sigest(control~., data = DataScores, frac = 1, na.action = na.omit, scaled = TRUE)[2]
+    model <- ksvm(control~., data = DataScores, type = "C-svc", kernel = "rbfdot", kpar = list(sigma = sig), C = 1, prob.model = TRUE)
+    ericscore <- predict(model, myscores, type = "probabilities")[,2]  
+  }
+  
+  myblacklist <- rep("", length(genename1))
+  ix.bl <- which((genename1 %in% gene.bl1 & genename2 %in% gene.bl2) | (genename1 %in% gene.bl2 & genename2 %in% gene.bl1))
+  if (length(ix.bl) > 0) {
+    for (bli in 1: length(ix.bl)) {
+      ix.bli <- which((gene.bl1 == genename1[ix.bl[bli]] & gene.bl2 == genename2[ix.bl[bli]]) | (gene.bl2 == genename1[ix.bl[bli]] & gene.bl1 == genename2[ix.bl[bli]]))
+      myblacklist[ix.bl[bli]] <- paste("Frequency:", sum(freq.bl[ix.bli]))
+    }
+  }
+  
+  cat("done. \n")
+  cat(paste("[EricScript] Re-calculating breakpoint positions for sample ", samplename, "... ", sep = ""))
+  
+  myseq <- as.character(SummaryMat$JunctionSequence) 
+  left_junction <- substr(myseq, 1, 50)
+  right_junction <- substr(myseq, 51, 100)
+  
+  chr1 <- rep("", length(ensgenename1))
+  chr2 <- rep("", length(ensgenename1))
+  genestart1 <- rep("", length(ensgenename1))
+  genestart2 <- rep("", length(ensgenename1))
+  geneend1 <- rep("", length(ensgenename1))
+  geneend2 <- rep("", length(ensgenename1))
+  strand1 <- rep("", length(ensgenename1))
+  strand2 <- rep("", length(ensgenename1))
+  
+  generef <- as.character(EnsemblGene.Structures$EnsemblGene)
+  chrref <- as.character(EnsemblGene.Structures$Chromosome)
+  genestartref <- as.character(EnsemblGene.Structures$geneStart)
+  geneendref <- as.character(EnsemblGene.Structures$geneEnd)
+  strandref <- as.character(EnsemblGene.Structures$Strand)
+  for (i in 1: length(ensgenename1)) {
+    
+    ix.ref <- which(generef == ensgenename1[i])
+    chr1[i] <- chrref[ix.ref]
+    genestart1[i] <- genestartref[ix.ref]
+    geneend1[i] <- geneendref[ix.ref]
+    strand1[i] <- strandref[ix.ref]
+    
+    ix.ref <- which(generef == ensgenename2[i])
+    chr2[i] <- chrref[ix.ref]
+    genestart2[i] <- genestartref[ix.ref]
+    geneend2[i] <- geneendref[ix.ref]
+    strand2[i] <- strandref[ix.ref]
+  }
+  
+  
+  # NEW Find GenomicPosition (50nt)
+  for (i in 1: length(ensgenename1)) {
+    if (i == 1) {
+      cat(paste("@", i, "_", 1, "\n", left_junction[i], "\n+\n", gsub(", ", "", toString(rep("I", nchar(left_junction[i])))), "\n", "@", i, "_", 2, "\n", right_junction[i],"\n+\n", gsub(", ", "", toString(rep("I", nchar(right_junction[i])))), sep = ""), sep = "\n", file = file.path(outputfolder, "out", "findgenomicpos.fq"), append = F)    
+    } else {
+      cat(paste("@", i, "_", 1, "\n", left_junction[i], "\n+\n", gsub(", ", "", toString(rep("I", nchar(left_junction[i])))), "\n", "@", i, "_", 2, "\n", right_junction[i],"\n+\n", gsub(", ", "", toString(rep("I", nchar(right_junction[i])))), sep = ""), sep = "\n", file = file.path(outputfolder, "out", "findgenomicpos.fq"), append = T)  
+    }
+  }
+  system(paste("bwa aln", "-R 50", genomeref, file.path(outputfolder, "out", "findgenomicpos.fq"), ">", file.path(outputfolder, "out", "findgenomicpos.fq.sai"), "2>>", file.path(outputfolder, "out", ".ericscript.log")))
+  system(paste("bwa samse", "-n 50", genomeref, file.path(outputfolder, "out", "findgenomicpos.fq.sai"), file.path(outputfolder, "out", "findgenomicpos.fq"), ">", file.path(outputfolder, "out", "findgenomicpos.fq.tmp"), "2>>", file.path(outputfolder, "out", ".ericscript.log")))
+  system(paste("cat", file.path(outputfolder, "out", "findgenomicpos.fq.tmp"), "|", file.path(ericscriptfolder, "lib", "perl", "xa2multi.pl"), "-", "|","grep -v -e \'^\\@\' -",">", file.path(outputfolder, "out", "findgenomicpos.fq.sam")))
+  xx.pos <- read.delim(file.path(outputfolder, "out", "findgenomicpos.fq.sam"), sep = "\t", header= F)
+  genpos_1 <- rep(0,length(ensgenename1))
+  genpos_2 <- rep(0,length(ensgenename1))
+  id.pos <- as.character(xx.pos[[1]])
+  flag.pos <- as.character(xx.pos[[2]])
+  chr.pos <- as.character(xx.pos[[3]])
+  if (length(grep("chr", chr.pos)) > 0) {
+    chr.pos <- gsub("chr", "", chr.pos)
+  }
+  pos.pos <- as.numeric(as.character(xx.pos[[4]]))
+  mapq.pos <- as.numeric(as.character(xx.pos[[5]]))
+  
+  for (i in 1: length(ensgenename1)) {
+    
+    ## for 5' gene
+    ix.mypos <- which(id.pos == paste(i, "_1", sep = ""))
+    chr.pos.ix <- chr.pos[ix.mypos]
+    flag.pos.ix <- flag.pos[ix.mypos]
+    pos.pos.ix <- pos.pos[ix.mypos]
+    mapq.pos.ix <- mapq.pos[ix.mypos] 
+    ix.okpos <- which(chr.pos.ix == chr1[i] & pos.pos.ix >= as.numeric(genestart1[i]) & pos.pos.ix <= as.numeric(geneend1[i]))
+    if (length(ix.okpos) > 1) {
+      ix.okpos <- ix.okpos[which.max(mapq.pos.ix[ix.okpos])]
+    }
+    if (length(ix.okpos) > 0) {
+      if (flag.pos.ix[ix.okpos] == 16) {
+        genpos_1[i] <- pos.pos.ix[ix.okpos] 
+      } else {
+        genpos_1[i] <- pos.pos.ix[ix.okpos] + 49
+      }
+    }
+    ## for 3' gene
+    ix.mypos <- which(id.pos == paste(i, "_2", sep = ""))
+    chr.pos.ix <- chr.pos[ix.mypos]
+    flag.pos.ix <- flag.pos[ix.mypos]
+    pos.pos.ix <- pos.pos[ix.mypos]
+    mapq.pos.ix <- mapq.pos[ix.mypos] 
+    ix.okpos <- which(chr.pos.ix == chr2[i] & pos.pos.ix >= as.numeric(genestart2[i]) & pos.pos.ix <= as.numeric(geneend2[i]))
+    if (length(ix.okpos) > 1) {
+      ix.okpos <- ix.okpos[which.max(mapq.pos.ix[ix.okpos])]
+    }
+    if (length(ix.okpos) > 0) {
+      if (flag.pos.ix[ix.okpos] == 16) {
+        genpos_2[i] <- pos.pos.ix[ix.okpos] + 49
+      } else {
+        genpos_2[i] <- pos.pos.ix[ix.okpos] 
+      }
+    }
+  }
+  
+  # NEW Find GenomicPosition (25nt_1)
+  ix.na.pos_1 <- which(genpos_1 == 0)
+  ix.na.pos_2 <- which(genpos_2 == 0)
+  if (length(ix.na.pos_1 ) > 0 | length(ix.na.pos_2 ) > 0) {
+  left_junction.trim <- substr(left_junction, 26, 50)
+  right_junction.trim <- substr(right_junction, 1, 25)
+  for (i in 1: length(ensgenename1)) {
+    if (i == 1) {
+      cat(paste("@", i, "_", 1, "\n", left_junction.trim[i], "\n+\n", gsub(", ", "", toString(rep("I", nchar(left_junction.trim[i])))), "\n", "@", i, "_", 2, "\n", right_junction.trim[i],"\n+\n", gsub(", ", "", toString(rep("I", nchar(right_junction.trim[i])))), sep = ""), sep = "\n", file = file.path(outputfolder, "out", "findgenomicpos.fq"), append = F)    
+    } else {
+      cat(paste("@", i, "_", 1, "\n", left_junction.trim[i], "\n+\n", gsub(", ", "", toString(rep("I", nchar(left_junction.trim[i])))), "\n", "@", i, "_", 2, "\n", right_junction.trim[i],"\n+\n", gsub(", ", "", toString(rep("I", nchar(right_junction.trim[i])))), sep = ""), sep = "\n", file = file.path(outputfolder, "out", "findgenomicpos.fq"), append = T)  
+    }
+  }
+  system(paste("bwa aln", "-R 50", genomeref, file.path(outputfolder, "out", "findgenomicpos.fq"), ">", file.path(outputfolder, "out", "findgenomicpos.fq.sai"), "2>>", file.path(outputfolder, "out", ".ericscript.log")))
+  system(paste("bwa samse", "-n 50", genomeref, file.path(outputfolder, "out", "findgenomicpos.fq.sai"), file.path(outputfolder, "out", "findgenomicpos.fq"), ">", file.path(outputfolder, "out", "findgenomicpos.fq.tmp"), "2>>", file.path(outputfolder, "out", ".ericscript.log")))
+  system(paste("cat", file.path(outputfolder, "out", "findgenomicpos.fq.tmp"), "|", file.path(ericscriptfolder, "lib", "perl", "xa2multi.pl"), "-", "|","grep -v -e \'^\\@\' -",">", file.path(outputfolder, "out", "findgenomicpos.fq.sam")))
+  xx.pos <- read.delim(file.path(outputfolder, "out", "findgenomicpos.fq.sam"), sep = "\t", header= F)
+  id.pos <- as.character(xx.pos[[1]])
+  flag.pos <- as.character(xx.pos[[2]])
+  chr.pos <- as.character(xx.pos[[3]])
+  if (length(grep("chr", chr.pos)) > 0) {
+    chr.pos <- gsub("chr", "", chr.pos)
+  }
+  pos.pos <- as.numeric(as.character(xx.pos[[4]]))
+  mapq.pos <- as.numeric(as.character(xx.pos[[5]]))
+  for (i in 1: length(ensgenename1)) {
+    if (i %in% ix.na.pos_1) {
+      ## for 5' gene
+      ix.mypos <- which(id.pos == paste(i, "_1", sep = ""))
+      chr.pos.ix <- chr.pos[ix.mypos]
+      flag.pos.ix <- flag.pos[ix.mypos]
+      pos.pos.ix <- pos.pos[ix.mypos]
+      mapq.pos.ix <- mapq.pos[ix.mypos] 
+      ix.okpos <- which(chr.pos.ix == chr1[i] & pos.pos.ix >= as.numeric(genestart1[i]) & pos.pos.ix <= as.numeric(geneend1[i]))
+      if (length(ix.okpos) > 1) {
+        ix.okpos <- ix.okpos[which.max(mapq.pos.ix[ix.okpos])]
+      }
+      if (length(ix.okpos) > 0) {
+        if (flag.pos.ix[ix.okpos] == 16) {
+          genpos_1[i] <- pos.pos.ix[ix.okpos] 
+        } else {
+          genpos_1[i] <- pos.pos.ix[ix.okpos] + 24
+        }
+      }
+    }
+    ## for 3' gene
+    if (i %in% ix.na.pos_2) {
+      ix.mypos <- which(id.pos == paste(i, "_2", sep = ""))
+      chr.pos.ix <- chr.pos[ix.mypos]
+      flag.pos.ix <- flag.pos[ix.mypos]
+      pos.pos.ix <- pos.pos[ix.mypos]
+      mapq.pos.ix <- mapq.pos[ix.mypos] 
+      ix.okpos <- which(chr.pos.ix == chr2[i] & pos.pos.ix >= as.numeric(genestart2[i]) & pos.pos.ix <= as.numeric(geneend2[i]))
+      if (length(ix.okpos) > 1) {
+        ix.okpos <- ix.okpos[which.max(mapq.pos.ix[ix.okpos])]
+      }
+      if (length(ix.okpos) > 0) {
+        if (flag.pos.ix[ix.okpos] == 16) {
+          genpos_2[i] <- pos.pos.ix[ix.okpos] + 24
+        } else {
+          genpos_2[i] <- pos.pos.ix[ix.okpos] 
+        }
+      }
+    }
+  }
+  # NEW Find GenomicPosition (25nt_2)
+  ix.na.pos_1 <- which(genpos_1 == 0)
+  ix.na.pos_2 <- which(genpos_2 == 0)
+  left_junction.trim <- substr(left_junction, 1, 25)
+  right_junction.trim <- substr(right_junction, 26, 50)
+  for (i in 1: length(ensgenename1)) {
+    if (i == 1) {
+      cat(paste("@", i, "_", 1, "\n", left_junction.trim[i], "\n+\n", gsub(", ", "", toString(rep("I", nchar(left_junction.trim[i])))), "\n", "@", i, "_", 2, "\n", right_junction.trim[i],"\n+\n", gsub(", ", "", toString(rep("I", nchar(right_junction.trim[i])))), sep = ""), sep = "\n", file = file.path(outputfolder, "out", "findgenomicpos.fq"), append = F)    
+    } else {
+      cat(paste("@", i, "_", 1, "\n", left_junction.trim[i], "\n+\n", gsub(", ", "", toString(rep("I", nchar(left_junction.trim[i])))), "\n", "@", i, "_", 2, "\n", right_junction.trim[i],"\n+\n", gsub(", ", "", toString(rep("I", nchar(right_junction.trim[i])))), sep = ""), sep = "\n", file = file.path(outputfolder, "out", "findgenomicpos.fq"), append = T)  
+    }
+  }
+  system(paste("bwa aln", "-R 50", genomeref, file.path(outputfolder, "out", "findgenomicpos.fq"), ">", file.path(outputfolder, "out", "findgenomicpos.fq.sai"), "2>>", file.path(outputfolder, "out", ".ericscript.log")))
+  system(paste("bwa samse", "-n 50", genomeref, file.path(outputfolder, "out", "findgenomicpos.fq.sai"), file.path(outputfolder, "out", "findgenomicpos.fq"), ">", file.path(outputfolder, "out", "findgenomicpos.fq.tmp"), "2>>", file.path(outputfolder, "out", ".ericscript.log")))
+  system(paste("cat", file.path(outputfolder, "out", "findgenomicpos.fq.tmp"), "|", file.path(ericscriptfolder, "lib", "perl", "xa2multi.pl"), "-", "|","grep -v -e \'^\\@\' -",">", file.path(outputfolder, "out", "findgenomicpos.fq.sam")))
+  
+  
+  xx.pos <- read.delim(file.path(outputfolder, "out", "findgenomicpos.fq.sam"), sep = "\t", header= F)
+  id.pos <- as.character(xx.pos[[1]])
+  flag.pos <- as.character(xx.pos[[2]])
+  chr.pos <- as.character(xx.pos[[3]])
+  if (length(grep("chr", chr.pos)) > 0) {
+    chr.pos <- gsub("chr", "", chr.pos)
+  }
+  pos.pos <- as.numeric(as.character(xx.pos[[4]]))
+  mapq.pos <- as.numeric(as.character(xx.pos[[5]]))
+  for (i in 1: length(ensgenename1)) {
+    if (i %in% ix.na.pos_1) {
+      ## for 5' gene
+      ix.mypos <- which(id.pos == paste(i, "_1", sep = ""))
+      chr.pos.ix <- chr.pos[ix.mypos]
+      flag.pos.ix <- flag.pos[ix.mypos]
+      pos.pos.ix <- pos.pos[ix.mypos]
+      mapq.pos.ix <- mapq.pos[ix.mypos] 
+      ix.okpos <- which(chr.pos.ix == chr1[i] & pos.pos.ix >= as.numeric(genestart1[i]) & pos.pos.ix <= as.numeric(geneend1[i]))
+      if (length(ix.okpos) > 1) {
+        ix.okpos <- ix.okpos[which.max(mapq.pos.ix[ix.okpos])]
+      }
+      if (length(ix.okpos) > 0) {
+        if (flag.pos.ix[ix.okpos] == 16) {
+          genpos_1[i] <- pos.pos.ix[ix.okpos] 
+        } else {
+          genpos_1[i] <- pos.pos.ix[ix.okpos] + 24
+        }
+      }
+    }
+    ## for 3' gene
+    if (i %in% ix.na.pos_2) {
+      ix.mypos <- which(id.pos == paste(i, "_2", sep = ""))
+      chr.pos.ix <- chr.pos[ix.mypos]
+      flag.pos.ix <- flag.pos[ix.mypos]
+      pos.pos.ix <- pos.pos[ix.mypos]
+      mapq.pos.ix <- mapq.pos[ix.mypos] 
+      ix.okpos <- which(chr.pos.ix == chr2[i] & pos.pos.ix >= as.numeric(genestart2[i]) & pos.pos.ix <= as.numeric(geneend2[i]))
+      if (length(ix.okpos) > 1) {
+        ix.okpos <- ix.okpos[which.max(mapq.pos.ix[ix.okpos])]
+      }
+      if (length(ix.okpos) > 0) {
+        if (flag.pos.ix[ix.okpos] == 16) {
+          genpos_2[i] <- pos.pos.ix[ix.okpos] + 24
+        } else {
+          genpos_2[i] <- pos.pos.ix[ix.okpos] 
+        }
+      }
+    }
+  }
+  }
+  
+#   # refine genomic coordinates
+#   genpos_1.recal <- genpos_1
+#   genpos_2.recal <- genpos_2
+#   for (i in 1: length(ensgenename1)) {
+#     ix.ref <- which(generef == ensgenename1[i])
+#     if (strand1[i] == "+") {
+#       exonpos <- as.numeric(unlist(strsplit(as.character(EnsemblGene.Structures$exonEnd[ix.ref]), ",")))
+#     } else {
+#       exonpos <- as.numeric(unlist(strsplit(as.character(EnsemblGene.Structures$exonStart[ix.ref]), ",")))
+#     }
+#     ix.exon <- which.min(abs(genpos_1[i] - exonpos))
+#     mydiff <- abs(genpos_1[i] - exonpos[ix.exon])
+#     if (mydiff <= 3) {
+#       genpos_1.recal[i] <- exonpos[ix.exon]
+#     }
+#     
+#     ix.ref <- which(generef == ensgenename1[i])
+#     if (strand2[i] == "+") {
+#       exonpos <- as.numeric(unlist(strsplit(as.character(EnsemblGene.Structures$exonStart[ix.ref]), ",")))
+#     } else {
+#       exonpos <- as.numeric(unlist(strsplit(as.character(EnsemblGene.Structures$exonEnd[ix.ref]), ",")))
+#     }
+#     ix.exon <- which.min(abs(genpos_2[i] - exonpos))
+#     mydiff <- abs(genpos_2[i] - exonpos[ix.exon])
+#     if (mydiff <= 3) {
+#       genpos_2.recal[i] <- exonpos[ix.exon]
+#     }
+#   }
+#   mynames <- names(SummaryMat)
+#   SummaryMat$Breakpoint1 <- genpos_1.recal
+#   SummaryMat$Breakpoint2 <- genpos_2.recal
+  
+  SummaryMat$Breakpoint1 <- genpos_1
+  SummaryMat$Breakpoint2 <- genpos_2  
+  SummaryMat$EricScore <- ericscore
+  SummaryMat$Blacklist <- myblacklist
+  save(SummaryMat, file=file.path(outputfolder, paste(samplename, ".Summary.recalc.RData", sep = "")))
+  n.spanning <- as.numeric(as.character(SummaryMat$spanningreads))
+  n.crossing <- as.numeric(as.character(SummaryMat$crossingreads))
+  oddity.spanningreads <- rep(0, length(genpos_1))
+  oddity.spanningreads[which(n.spanning == 1 & n.crossing >= 10)] <- 1    
+  
+  if (dim(SummaryMat)[1] > 0) {
+    write.table(SummaryMat, file = file.path(outputfolder,paste(samplename,".results.recalc.total.tsv", sep = "")), sep = "\t", row.names = F, quote = F)
+    ix.sorting.score <- sort(ericscore, decreasing = T, index.return = T)$ix
+    ericscore.sorted <- ericscore[ix.sorting.score]
+    myblacklist.sorted <- myblacklist[ix.sorting.score]
+    oddity.spanningreads.sorted <- oddity.spanningreads[ix.sorting.score]
+    SummaryMat.sorted <- SummaryMat[ix.sorting.score, ]
+    SummaryMat.Filtered <-  SummaryMat.sorted[which(ericscore.sorted > 0.5 & myblacklist.sorted == "" & oddity.spanningreads.sorted == 0), ]
+    write.table(SummaryMat.Filtered[, -16], file = file.path(outputfolder,paste(samplename,".results.recalc.filtered.tsv", sep = "")), sep = "\t", row.names = F, quote = F)
+  }
+  cat("done. \n")
+  cat(paste("[EricScript] Breakpoint position corrected files are in ", outputfolder, ".\n", sep = ""))
+  
+} else {  
+  cat("[EricScript] No files of results found in ", outputfolder, ". Nothing to be done, Exit!\n", sep =" ")
+}
+
diff --git a/lib/R/CalcStats.R b/lib/R/CalcStats.R
new file mode 100644
index 0000000..52138ef
--- /dev/null
+++ b/lib/R/CalcStats.R
@@ -0,0 +1,223 @@
+### calculate statistics
+
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+resultsfolder <- split.vars[1]
+outputfolder <- split.vars[2]
+datafolder <- split.vars[3]
+algoname <- split.vars[4]
+dataset <- split.vars[5]
+readlength <- as.numeric(split.vars[6])
+normroc <- as.numeric(split.vars[7])
+ericscriptfolder <- as.character(split.vars[8])
+
+source(file.path(ericscriptfolder, "lib", "R", "ImportResults.R"))
+
+trapezint <- function (x, y, a, b)  {
+## function of the ROC package (http://bioconductor.org) 
+    if (length(x) != length(y)) 
+	stop("length x must equal length y")
+    y <- y[x >= a & x <= b]
+    x <- x[x >= a & x <= b]
+    if (length(unique(x)) < 2) 
+	return(NA)
+    ya <- approx(x, y, a, ties = max, rule = 2)$y
+    yb <- approx(x, y, b, ties = max, rule = 2)$y
+    x <- c(a, x, b)
+    y <- c(ya, y, yb)
+    h <- diff(x)
+    lx <- length(x)
+    0.5 * sum(h * (y[-1] + y[-lx]))
+}
+
+algonamelist <- c("ericscript", "chimerascan", "defuse", "fusionmap", "shortfuse")
+algoname <- tolower(algoname)
+
+if (any(algonamelist %in% algoname) == F) {
+	algoid <- "unknown"
+} else {
+	algoid <- algoname
+}
+
+xx <- list.files(resultsfolder, pattern = "sim_")
+nsims <- length(xx)
+cat("[EricScript calcstats] Found ", nsims, " synthetic data analysis for algorithm ", algoname,". \n", sep = "")
+
+if (nsims > 0) {
+	tpr <- rep(NA, nsims)
+	fpr <- rep(NA, nsims)
+	tpr.5 <- rep(NA, nsims)
+	fpr.5 <- rep(NA, nsims)
+	tpr.seq <- rep(NA, nsims)
+	refpath <- file.path(datafolder, dataset, "data")
+	rocs.tpr <- rep(0, 1000)
+	rocs.fpr <- rep(0, 1000)
+	nosims <- 0
+	
+	refpath <- file.path(datafolder, dataset, "data")
+	
+	for (i in 1: nsims) {
+		if (i < 10) {
+			dataresults <- get(paste("Import", algoid, sep = "_"))(file.path(resultsfolder, paste("sim_", "0000", i, sep = "")))
+			if (is.list(dataresults)) {
+				load(file.path(refpath, paste("sim_", "0000", i, sep = ""), "GeneFusions.RData"))
+				cat ("[EricScript calcstats] Analysing ", paste("sim_", "0000", i, sep = "")," ... ")
+			} else if (dataresults == 0) {
+				cat("[EricScript calcstats] No results file found for ", paste("sim_", "0000", i, sep = ""), ".\n", sep = "")
+				nosims <- nosims + 1
+				next
+			} else if (dataresults > 1) {
+				cat("[EricScript calcstats] Error: ", dataresults, " results file found for ", paste("sim_", "0000", i, sep = ""), ". Only 1 file of results is required. \n", sep = "")
+				nosims <- nosims + 1
+				next
+			}
+		} else if (i >= 10 & i < 100) {
+			dataresults <- get(paste("Import", algoid, sep = "_"))(file.path(resultsfolder, paste("sim_", "000", i, sep = "")))
+			if (is.list(dataresults)) {
+				load(file.path(refpath, paste("sim_", "000", i, sep = ""), "GeneFusions.RData"))
+				cat ("[EricScript calcstats] Analysing ", paste("sim_", "000", i, sep = "")," ... ")
+			} else if (dataresults == 0) {
+				cat("[EricScript calcstats] No results file found for ", paste("sim_", "000", i, sep = ""), ".\n", sep = "")
+				nosims <- nosims + 1
+				next
+			} else if (dataresults > 1) {
+				cat("[EricScript calcstats] Error: ", dataresults, " results file found for ", paste("sim_", "000", i, sep = ""), ". Only 1 file of results is required. \n", sep = "")
+				nosims <- nosims + 1
+				next
+			}
+		} else if (i >= 100 & i < 1000) {
+			dataresults <- get(paste("Import", algoid, sep = "_"))(file.path(resultsfolder, paste("sim_", "00", i, sep = "")))
+			if (is.list(dataresults)) {
+				load(file.path(refpath, paste("sim_", "00", i, sep = ""), "GeneFusions.RData"))
+				cat ("[EricScript calcstats] Analysing ", paste("sim_", "00", i, sep = "")," ... ")
+			} else if (dataresults == 0) {
+				cat("[EricScript calcstats] No results file found for ", paste("sim_", "00", i, sep = ""), ".\n", sep = "")
+				nosims <- nosims + 1
+				next
+			} else if (dataresults > 1) {
+				cat("[EricScript calcstats] Error: ", dataresults, " results file found for ", paste("sim_", "00", i, sep = ""), ". Only 1 file of results is required. \n", sep = "")
+				nosims <- nosims + 1
+				next
+			}
+		} else if (i >= 1000 & i < 10000) {
+			dataresults <- get(paste("Import", algoid, sep = "_"))(file.path(resultsfolder, paste("sim_", "0", i, sep = "")))
+			if (is.list(dataresults)) {
+				load(file.path(refpath, paste("sim_", "0", i, sep = ""), "GeneFusions.RData"))
+				cat ("[EricScript calcstats] Analysing ", paste("sim_", "0000", i, sep = "")," ... ")
+			} else if (dataresults == 0) {
+				cat("[EricScript calcstats] No results file found for ", paste("sim_", "0", i, sep = ""), ".\n", sep = "")
+				nosims <- nosims + 1
+				next
+			} else if (dataresults > 1) {
+				cat("[EricScript calcstats] Error: ", dataresults, " results file found for ", paste("sim_", "0", i, sep = ""), ". Only 1 file of results is required. \n", sep = "")
+				nosims <- nosims + 1
+				next
+			}
+		} else if (i >= 10000 & i < 100000) {
+			dataresults <- get(paste("Import", algoid, sep = "_"))(file.path(resultsfolder, paste("sim_", i, sep = "")))
+			if (is.list(dataresults)) {
+				load(file.path(refpath, paste("sim_", i, sep = ""), "GeneFusions.RData"))
+				cat ("[EricScript calcstats] Analysing ", paste("sim_",  i, sep = "")," ... ")
+			} else if (dataresults == 0) {
+				cat("[EricScript calcstats] No results file found for ", paste("sim_", i, sep = ""), ".\n", sep = "")
+				nosims <- nosims + 1
+				next
+			} else if (dataresults > 1) {
+				cat("[EricScript calcstats] Error: ", dataresults, " results file found for ", paste("sim_", i, sep = ""), ". Only 1 file of results is required. \n", sep = "")
+				nosims <- nosims + 1
+				next
+			}
+		}
+		
+		id1.simul <- GeneFusions[[1]]
+		id2.simul <- GeneFusions[[2]]
+		seq.simul <- GeneFusions[[5]]
+		ngenefusions <- length(id1.simul)
+		if (!exists("cov.tpr")) {
+			cov.tpr <- rep(0, ngenefusions)
+		}
+		gene1ens <- as.character(dataresults$gene5)
+		gene2ens <- as.character(dataresults$gene3)
+		nreads <- as.numeric(as.character(dataresults$nreads))
+		score <- as.numeric(as.character(dataresults$score))
+		if (normroc > 1) {
+			score <- score/normroc
+		}
+		seq <- as.character(dataresults$seq)
+		
+		tpr[i] <- length(which(gene1ens %in% id1.simul & gene2ens %in% id2.simul))/ngenefusions
+		ix.tpr <- which(id1.simul %in% gene1ens & id2.simul %in%gene2ens)
+		ix.fpr <- which((gene1ens %in% id1.simul & gene2ens %in% id2.simul) == F)
+		cov.tpr[ix.tpr] <- cov.tpr[ix.tpr] + 1
+		
+		tpr.5[i] <- length(which(gene1ens %in% id1.simul & gene2ens %in% id2.simul & nreads > 5))/ngenefusions
+		fpr.5[i] <- length(which((gene1ens %in% id1.simul & gene2ens %in% id2.simul) == F & nreads > 5))/length(gene1ens)
+		
+		fpr[i] <- (length(gene1ens) - tpr[i]*ngenefusions)/length(gene1ens)
+		
+		
+		rocs.tpr <- colSums(rbind(rocs.tpr, tabulate(score[which(gene1ens %in% id1.simul & gene2ens %in% id2.simul)]*1000, nbins = 1000)))
+		rocs.fpr <- colSums(rbind(rocs.fpr, tabulate(score[which((gene1ens %in% id1.simul & gene2ens %in% id2.simul) == F)]*1000, nbins = 1000)))
+		
+		ix.correctseq <- c()
+		for (ii in 1: length(ix.tpr)) {
+			ix.tmp <- which(gene1ens == id1.simul[ix.tpr[ii]] & gene2ens == id2.simul[ix.tpr[ii]])
+			if (length(ix.tmp) > 1) { 
+				ix.tmp <- ix.tmp[1]
+			}
+			if (length(agrep(toupper(seq[ix.tmp]), seq.simul[ix.tpr], max = 5)) > 0 & is.na(seq[ix.tmp]) == F) {
+				ix.correctseq <- c(ix.correctseq, ix.tpr[ii])
+			}
+		}
+		tpr.seq[i] <- length(ix.correctseq)/length(ix.tpr)
+		
+		
+		cat ("done.\n")
+		
+	}
+	
+	
+	roc.total <- rocs.tpr + rocs.fpr
+	ntot <- sum(roc.total)
+	ntot.tpr <- sum(rocs.tpr)
+	ntot.fpr <- sum(rocs.fpr)
+	sens <- rep(0, 1000)
+	spec <- rep(0, 1000)
+	for (i in 1: 1000) {
+		if (i == 1) {
+			sens[i] <- sum(rocs.tpr)/ntot.tpr
+			spec[i] <- 1 - sum(rocs.fpr)/ntot.fpr 
+		} else {
+			sens[i] <- sum(rocs.tpr[-c(1:i)])/ntot.tpr
+			spec[i] <- 1 - sum(rocs.fpr[-c(1:i)])/ntot.fpr
+		}
+	}
+	
+	stats <- list()
+	stats$algorithm <- algoname
+	stats$dataset <- dataset
+	stats$readlength <- readlength
+	stats$totalsims <- nsims
+	stats$nsims <- nsims - nosims
+	stats$meantpr <- mean(tpr, na.rm = T)
+	stats$meanfpr <- mean(fpr, na.rm = T)
+	stats$meantpsr <- mean(tpr.seq, na.rm = T)
+	stats$auc <- trapezint(sens, 1 - spec, 0, 1)
+	stats$meantpr5 <- mean(tpr.5, na.rm = T)
+	stats$meanfpr5 <- mean(fpr.5, na.rm = T)
+	stats$tpr <- tpr
+	stats$fpr <- fpr
+	stats$tpsr <- tpr.seq
+	stats$tpr5 <- tpr.5
+	stats$fpr5 <- fpr.5
+	stats$scoring_sensitivity <- sens
+	stats$scoring_specificity <- spec
+	stats$covtpr <- cov.tpr/(nsims-nosims)
+	
+	save(stats, file = file.path(outputfolder, paste(algoname, dataset, readlength, "stats","RData", sep = ".")))
+} else {
+	cat ("[EricScript calcstats] Error: no directories containing results on synthetic data have been found in ", resultsfolder, ". Exit.\n", sep = "")
+	
+}
+
diff --git a/lib/R/CheckDB.R b/lib/R/CheckDB.R
new file mode 100644
index 0000000..faca01b
--- /dev/null
+++ b/lib/R/CheckDB.R
@@ -0,0 +1,73 @@
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+ericscriptfolder <- split.vars[1]
+refid <- split.vars[2]
+dbfolder <- as.character(split.vars[3])
+
+flag.dbexists <- 1
+
+mydbdata.homo <- c("EnsemblGene.Reference.fa", "EnsemblGene.Sequences.RData", "EnsemblGene.GenePosition.RData", "EnsemblGene.Structures.RData", "EnsemblGene.GeneInfo.RData", "EnsemblGene.Paralogs.RData", "EnsemblGene.GeneNames.RData")
+mydbdata <- c("EnsemblGene.Reference.fa", "EnsemblGene.Sequences.RData", "EnsemblGene.GenePosition.RData", "EnsemblGene.Structures.RData", "EnsemblGene.GeneInfo.RData","EnsemblGene.GeneNames.RData")
+
+xx <- file.exists(file.path(dbfolder, "data", refid))
+  if (xx) {
+    xx.files <- list.files(file.path(dbfolder, "data", refid))
+    if (refid == "homo_sapiens") {
+      xx1 <- all( mydbdata.homo %in% xx.files )
+    } else {
+      xx1 <- all( mydbdata %in% xx.files )
+    }
+    if (!xx1) {
+      flag.dbexists <- 0
+      cat("[EricScript] Some required db files were not found for", refid, "genome. Please run \"ericscript.pl --downdb --refid", refid, "\" to solve this.\n")
+    }
+  } else {
+    flag.dbexists <- 0
+    cat("[EricScript] DB data for", refid, "genome do not exist. Set correct -db option or run \" ericscript.pl --downdb --refid", refid, "\" to solve this.\n")
+  }
+
+## check bwa version
+yy <- file.exists(file.path(ericscriptfolder, "lib", "data", "_resources", ".bwa.version"))
+if (yy) {
+  prev.version.bwa <- scan(file.path(ericscriptfolder, "lib", "data", "_resources", ".bwa.version"), what = "", quiet = T, sep = "\n")
+  system(paste("bwa", "2>&1", "|", "grep ersion", ">", file.path(ericscriptfolder, "lib", "data", "_resources", ".bwa.version.tmp")))
+  curr.version.bwa <- scan(file.path(ericscriptfolder, "lib", "data", "_resources", ".bwa.version.tmp"), what = "", quiet = T, sep = "\n")
+  if (curr.version.bwa != prev.version.bwa) {
+    cat("[EricScript] Updating BWA indexes for", refid,"... ")    
+    system(paste("bwa index", file.path(file.path(dbfolder, "data", refid, "EnsemblGene.Reference.fa")), "1>>", file.path(ericscriptfolder, "lib", "data", "_resources", ".bwa.version.tmp"), "2>>", file.path(ericscriptfolder, "lib", "data", "_resources", ".bwa.version.tmp")))    
+    cat("done.\n")
+    cat(curr.version.bwa, file = file.path(ericscriptfolder, "lib", "data", "_resources", ".bwa.version"))
+  }
+} else {
+  system(paste("bwa", "2>&1", "|", "grep ersion", ">", file.path(ericscriptfolder, "lib", "data", "_resources", ".bwa.version.tmp")))
+  curr.version.bwa <- scan(file.path(ericscriptfolder, "lib", "data", "_resources", ".bwa.version.tmp"), what = "", quiet = T, sep = "\n")
+  version.a <- gsub("Version: ", "", strsplit(curr.version.bwa, ".", fixed = T)[[1]][1])
+  version.b <- strsplit(curr.version.bwa, ".", fixed = T)[[1]][2]
+  version.c <- gsub("[a-z]", "", strsplit(strsplit(curr.version.bwa, ".", fixed = T)[[1]][3], "-")[[1]][1])
+  version.tot <- as.numeric(paste(version.a, version.b, version.c, sep = ""))
+  if (version.tot >= 74) {
+    cat("[EricScript] Updating BWA indexes for", refid, "...")    
+    system(paste("bwa index", file.path(file.path(dbfolder, "data", refid, "EnsemblGene.Reference.fa")), "1>>", file.path(ericscriptfolder, "lib", "data", "_resources", ".bwa.version.tmp"), "2>>", file.path(ericscriptfolder, "lib", "data", "_resources", ".bwa.version.tmp")))    
+    cat("done.\n")
+    system(paste("bwa", "2>&1", "|", "grep ersion", ">", file.path(ericscriptfolder, "lib", "data", "_resources", ".bwa.version")))
+   } else {
+    flag.dbexists <- 0
+    cat("[EricScript] BWA version >= 0.7.4 is required. Exit.\n") 
+  }
+}
+
+mydbdata.bwa <- c("EnsemblGene.Reference.fa.bwt", "EnsemblGene.Reference.fa.pac", "EnsemblGene.Reference.fa.ann", "EnsemblGene.Reference.fa.amb", "EnsemblGene.Reference.fa.sa")
+
+if (xx) {
+  xx.files.bwa <- list.files(file.path(dbfolder, "data", refid))
+  xx1 <- all( mydbdata.bwa %in% xx.files.bwa )
+  if (!xx1) {
+    flag.dbexists <- 0
+    cat("[EricScript] Some required files (bwa indexes) were not found for", refid, "genome. Please run \"ericscript.pl --downdb --refid", refid, "\" to solve this.\n")
+  }  
+}
+
+cat(flag.dbexists, file = file.path(ericscriptfolder, "lib", "data", "_resources", ".flag.dbexists"))
+
+
diff --git a/lib/R/CheckSelfHomology.R b/lib/R/CheckSelfHomology.R
new file mode 100644
index 0000000..f225774
--- /dev/null
+++ b/lib/R/CheckSelfHomology.R
@@ -0,0 +1,110 @@
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+samplename <- split.vars [1]
+outputfolder <- split.vars[2]
+x <- read.delim(file.path(outputfolder,"out",paste(samplename, ".checkselfhomology.blat", sep = "")), sep = "\t", header = F)
+query.fa <- readLines(file.path(outputfolder,"out",paste(samplename, ".checkselfhomology.fa", sep = "")))
+id.fa <- substr(query.fa[seq(1, length(query.fa), by = 2)], 2, nchar(query.fa[seq(1, length(query.fa), by = 2)]))
+id.query.nolabel <- as.character(x[,1])
+unique.ids.nolabel <- unique(id.query.nolabel)
+ix.fa <- which(id.fa %in% unique.ids.nolabel)
+seq.fa <- query.fa[seq(2, length(query.fa), by = 2)][ix.fa]
+id.target.f <-  as.character(x[,2])
+id.match.f <- as.numeric(as.character(x[,4]))
+start.match <- as.numeric(as.character(x[,7]))
+end.match <-  as.numeric(as.character(x[,8]))
+diff.match <- end.match - start.match + 1
+ix.junct.nohomology <- rep(0, length(unique.ids.nolabel))
+ix.junct.homology <- rep(0, length(unique.ids.nolabel))
+flag.dup.a <- rep(0, length(unique.ids.nolabel))
+flag.dup.b <- rep(0, length(unique.ids.nolabel))
+homology.list <- vector("list", length(unique.ids.nolabel))
+for (i in 1:length(unique.ids.nolabel)) {
+	ix.id <- which(id.query.nolabel == unique.ids.nolabel[i])
+	query <- unique.ids.nolabel[i]
+	target <- id.target.f[ix.id]
+	match <- as.numeric(id.match.f[ix.id])
+	query.tmp <- unlist(strsplit(query, "----", fixed = T))
+	query_a <- unlist(strsplit(query.tmp[1], "_"))[1]
+	query_b <- unlist(strsplit(query.tmp[2], "_"))[1]
+	diff.match.tmp <- diff.match[ix.id]
+	start.match.tmp <- start.match[ix.id]
+	end.match.tmp <- end.match[ix.id]
+	width <- 100 - length(grep("N", unlist(strsplit(seq.fa[i], ""))))
+	ix.c <- seq.int(1,length(target))
+	ix.a <- which(target %in% query_a)
+	ix.b <- which(target %in% query_b)
+	ix.ab <- c(ix.a, ix.b)
+	if (((length(ix.a) > 0) & (length(ix.b) > 0)) | (length(ix.a) > 1 & length(ix.b) == 1) | (length(ix.a) == 1 & length(ix.b) > 1)) {
+		if ((length(ix.a) > 1 & length(ix.b) == 1)) {
+			myflag <- length(which(start.match.tmp[ix.a] %in% c((start.match.tmp[ix.b]-3):(start.match.tmp[ix.b]+3)))) + length(which(end.match.tmp[ix.a] %in% c((end.match.tmp[ix.b]-3):(end.match.tmp[ix.b]+3))))
+		} else if ((length(ix.a) == 1 & length(ix.b) > 1)) {
+			myflag <- length(which(start.match.tmp[ix.b] %in% c((start.match.tmp[ix.a]-3):(start.match.tmp[ix.a]+3)))) + length(which(end.match.tmp[ix.b] %in% c((end.match.tmp[ix.a]-3):(end.match.tmp[ix.a]+3))))
+		} else {
+			myflag <- 0
+		}
+		if (myflag == 0) {
+			if (max(diff.match.tmp[ix.ab]) < round(0.8*width) & ((length(ix.ab) > 2) & any(diff.match.tmp[ix.ab] < 30) | (length(ix.ab) == 2))) {
+				if (length(ix.ab) != 0) {
+					ix.c <- ix.c[-ix.ab]
+				}
+				
+				if(length(ix.c) != 0) {
+					unique.ids.homology <- unique(target[ix.c])
+					homology.list[[i]] <- vector("list", length(unique.ids.homology))
+					for (j in 1:length(unique.ids.homology)) {
+						ix.id.homology <- which(target[ix.c] == unique.ids.homology[j])
+						max.match <- max(match[ix.c][ix.id.homology])
+						homology.list[[i]][[j]] <- cbind(unique.ids.homology[j], max.match)
+					}
+					ix.junct.homology[i] <- 1
+				}
+				if(length(ix.c) == 0) {
+					ix.junct.nohomology[i] <- 1
+					
+				}
+				if (length(ix.a) > 1) {
+					flag.dup.a[i] <- 1
+				}
+				if (length(ix.b) > 1) {
+					flag.dup.b[i] <- 1
+				}
+				
+			}
+			
+		}
+	}
+}
+ix.junct <- sort(c(which(ix.junct.nohomology == 1), which(ix.junct.homology == 1)))
+if (length(ix.junct) == 0) {
+	myflag <- 0
+	cat(myflag, file = file.path(outputfolder, "out", ".ericscript.flag"))
+	stop("No putative gene fusions pass the self-homology filter. Exit!")		
+}
+
+info.homology <- rep("", length(ix.junct))
+for (i in 1: length(ix.junct)) {
+	list.tmp <- homology.list[[ix.junct[i]]]
+	if (is.null(list.tmp) == F) {
+		info.homology.tmp <- c()
+		n.homo <- length(list.tmp)
+		if (n.homo > 30) {
+			n.homo <- 30
+			info.homology.tmp <- "More than 30 homologies found: "
+		}
+		for (j in 1: n.homo) {
+			info.homology.tmp <- paste(info.homology.tmp, paste(list.tmp[[j]][1,1]," (",list.tmp[[j]][1,2],"%)", sep = "" ), sep = "")
+			if (n.homo > 1 & j < n.homo) {
+				info.homology.tmp <- paste(info.homology.tmp, ", ", sep = "")
+			} else 
+			{
+				info.homology[i] <- info.homology.tmp
+			}
+		}
+		
+	}
+}
+
+info.id.and.homology <- cbind(unique.ids.nolabel[ix.junct], info.homology, flag.dup.a[ix.junct], flag.dup.b[ix.junct])
+save(info.id.and.homology, file = file.path(outputfolder,"out",paste(samplename,".ids_homology.RData", sep  = "")))
diff --git a/lib/R/ConvertTxt2R.R b/lib/R/ConvertTxt2R.R
new file mode 100644
index 0000000..c71b617
--- /dev/null
+++ b/lib/R/ConvertTxt2R.R
@@ -0,0 +1,78 @@
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+ericscriptfolder <- split.vars [1]
+refid <- split.vars[2]
+dbfolder <- split.vars[3]
+tmpfolder <- split.vars[4]
+
+xx <- read.delim(file.path(tmpfolder, "genepos.txt"), sep = "\t", header = F)
+chrs <- as.character(xx[[2]])
+unique.chrs <- unique(chrs)
+geneid <- as.character(xx[[1]])
+genepos <- as.numeric(as.character(xx[[3]]))
+xx.strand <- read.delim(file.path(tmpfolder, "strand.txt"), sep = "\t", header = F)
+strand <- as.character(xx.strand[[2]])
+## sorting genes by genomic pos
+ix.srt <- rep(NA, dim(xx)[1])
+count <- 0
+for ( i in 1: length(unique.chrs)) {
+  ix.chr <- which(chrs == unique.chrs[i])
+  tmp <- sort(genepos[ix.chr], index.return = T)
+  ix.srt[(count + 1):(count + length(ix.chr))] <- ix.chr[tmp$ix]
+  count <- count + length(ix.chr)
+}
+
+geneid.srt <- geneid[ix.srt]
+genepos.str <- genepos[ix.srt]
+strand.srt <- strand[ix.srt]
+
+EnsemblGene.GenePosition <- xx[ix.srt, ]
+names(EnsemblGene.GenePosition) <- c("EnsemblGene", "Chromosome", "Position")
+save(EnsemblGene.GenePosition, file = file.path(dbfolder, "data", refid, "EnsemblGene.GenePosition.RData"))
+
+xx <- read.delim(file.path(tmpfolder, "exonstartend.mrg.txt"), sep = "\t", header = F)
+
+exgeneid <- as.character(xx[[4]])
+exchr <- as.character(xx[[1]])
+exstart.tmp <- as.numeric(as.character(xx[[2]])) + 1
+exend.tmp <- as.character(xx[[3]])
+ix.dup <- which(!duplicated(exgeneid))
+exstart <- rep("", length(ix.dup))
+exend <- rep("", length(ix.dup))
+excount <- rep(NA, length(ix.dup))
+start <- rep("", length(ix.dup))
+end <- rep("", length(ix.dup))
+strand1 <- rep("", length(ix.dup))
+for (i in 1: (length(ix.dup) - 1)) {
+  if (strand[i] == "-1") {
+    strand1[i] <- "-"
+  } else {
+    strand1[i] <- "+"    
+  }
+  ix.tmp <- ix.dup[i]:(ix.dup[i+1] - 1)
+  start[i] <- exstart.tmp[ix.tmp][1]
+  end[i] <- exend.tmp[ix.tmp][length(ix.tmp)]
+  exstart[i] <- toString(exstart.tmp[ix.tmp])
+  exend[i] <- toString(exend.tmp[ix.tmp])
+  excount[i] <- length(ix.tmp)
+}
+
+
+
+EnsemblGene.Structures <- cbind(exgeneid[ix.dup], exchr[ix.dup], strand1, start, end, excount, exstart, exend)[ix.srt, ]
+EnsemblGene.Structures <- data.frame(EnsemblGene.Structures)
+names(EnsemblGene.Structures) <- c("EnsemblGene", "Chromosome", "Strand", "geneStart", "geneEnd", "exonCount", "exonStart", "exonEnd")
+save(EnsemblGene.Structures, file = file.path(dbfolder, "data", refid, "EnsemblGene.Structures.RData"))
+
+xx <- read.delim(file.path(tmpfolder, "geneinfo.txt"), sep = "\t", header = F)
+EnsemblGene.GeneInfo <- xx[ix.srt, ]
+names(EnsemblGene.GeneInfo) <- c("EnsemblGene", "GeneName", "Description")
+save(EnsemblGene.GeneInfo, file = file.path(dbfolder, "data", refid, "EnsemblGene.GeneInfo.RData"))
+
+if (refid == "homo_sapiens") {
+xx <- read.delim(file.path(tmpfolder, "paralogs.txt"), sep = "\t", header = F)
+EnsemblGene.Paralogs <- xx
+names(EnsemblGene.Paralogs) <- c("EnsemblGene", "Paralogs")
+save(EnsemblGene.Paralogs, file = file.path(dbfolder, "data", refid, "EnsemblGene.Paralogs.RData"))
+}
diff --git a/lib/R/CreateDataEricTheSimulator.R b/lib/R/CreateDataEricTheSimulator.R
new file mode 100644
index 0000000..f2461ef
--- /dev/null
+++ b/lib/R/CreateDataEricTheSimulator.R
@@ -0,0 +1,53 @@
+options(stringsAsFactors=F)
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+refid <- split.vars[1]
+dbfolder <- split.vars[2]
+tmpfolder <- split.vars[3]
+
+## read transcript genomic info
+xx <-  read.delim(file.path(tmpfolder, "transcripts.txt"), sep = "\t", header = F)
+geneid <- xx[[1]]
+transcriptid <- xx[[2]]
+exonstart <- xx[[3]]
+exonend <- xx[[4]]
+chr <- xx[[5]]
+strandtmp <- xx[[6]]
+strand <- rep("+", length(strandtmp))
+strand[strandtmp == "-1"] <- "-"
+## read transcript cdna
+xxseq <- scan(file.path(tmpfolder, "transcripts.fa"), what = list(seq="", id=""), sep = "\t", quiet = T)
+seqtmp <- xxseq[[1]]
+transcriptid.seqtmp <- xxseq[[2]]
+rm (xx, xxseq)
+unique.transcriptid <- unique(transcriptid)
+EnsemblGene.Structures <- c()
+GeneNames <- rep("", length(unique.transcriptid))
+sequences <- rep("", length(unique.transcriptid))
+for (i in 1: length(unique.transcriptid)) {
+  ix <- which(transcriptid == unique.transcriptid[i])
+  ixseq <- which(transcriptid.seqtmp == unique.transcriptid[i])
+  if (length(ixseq) > 0) {
+    sequences[i] <-  seqtmp[ixseq]    
+  } else {
+    sequences[i] <-  "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"
+  }
+  ixsrt <- sort(exonstart[ix], index.return = T)$ix
+  genestart <- min(c(exonstart[ix], exonend[ix]))
+  geneend <- max(c(exonstart[ix], exonend[ix]))
+  exonStart <- toString(exonstart[ix[ixsrt]])
+  exonEnd <- toString(exonend[ix[ixsrt]])
+  exoncount <- length(ix)
+  mychr <- unique(chr[ix])
+  mystrand <- unique(strand[ix])
+  GeneNames[i] <- unique(geneid[ix])
+  EnsemblGene.Structures <- rbind(EnsemblGene.Structures, c(unique.transcriptid[i], mychr, mystrand, genestart, geneend, exoncount, exonStart, exonEnd))
+}
+colnames(EnsemblGene.Structures) <- c("EnsemblGene", "Chromosome", "Strand", "geneStart", "geneEnd", "exonCount", "exonStart", "exonEnd")
+EnsemblGene.Structures <- data.frame(EnsemblGene.Structures)
+save(EnsemblGene.Structures, GeneNames, sequences, file = file.path(dbfolder, "data", refid, "EnsemblGene.Transcripts.RData"))
+
+
+
+
diff --git a/lib/R/DownloadDB.R b/lib/R/DownloadDB.R
new file mode 100644
index 0000000..48e3757
--- /dev/null
+++ b/lib/R/DownloadDB.R
@@ -0,0 +1,207 @@
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+ericscriptfolder <- split.vars [1]
+dbfolder <- split.vars [2]
+user.refid <- split.vars [3]
+tmpfolder <- split.vars [4]
+ensversion <- as.numeric(split.vars [5])
+####
+
+load(file.path(dbfolder, "data", "_resources", "RefID.RData"))
+ix.refid <- which(refid == user.refid)
+if (length(ix.refid) == 0) {
+  cat("\n[EricScript] Error: No data available for genome ", user.refid, ". Run ericscript.pl --printdb to view the available genomes.\n", sep = "")
+  cat(0, file = file.path(tmpfolder, ".refid.flag"))
+  quit( save = "no")
+}
+cat(1, file = file.path(tmpfolder, ".refid.flag"))
+myrefid <- refid[ix.refid]
+myrefid.path <- refid.path[ix.refid]
+
+if (ensversion == 0) {
+  ensversion <- version
+}
+## create xml queries
+
+tmp0 <- unlist(strsplit(myrefid, "_"))
+myrefid.xml <- paste(gsub(", ", "", toString(c(substr(tmp0[1], 1, 1), tmp0[2]))), "gene", "ensembl", sep = "_")
+
+## genepos file
+fileout <- file.path(tmpfolder, "genepos.xml")
+cat("<?xml version=\"1.0\" encoding=\"UTF-8\"?>", file = fileout, sep = "\n")
+cat("<Query  virtualSchemaName = \"default\" formatter = \"TSV\" header = \"0\" uniqueRows = \"0\" count = \"\" datasetConfigVersion = \"0.6\" >", file = fileout, sep = "\n", append = T)
+cat("", file = fileout, sep = "\n", append = T)
+cat(paste("<Dataset name = \"", myrefid.xml,"\" interface = \"default\" >", sep = ""), file = fileout, sep = "\n", append = T)
+#cat("<Filter name = \"status\" value = \"KNOWN\"/>", file = fileout, sep = "\n", append = T)
+#cat("<Filter name = \"transcript_status\" value = \"KNOWN\"/>", file = fileout, sep = "\n", append = T)
+if (myrefid == "homo_sapiens") {
+  cat("<Filter name = \"biotype\" value = \"processed_transcript,protein_coding\"/>", file = fileout, sep = "\n", append = T)
+  cat("<Filter name = \"with_hgnc\" excluded = \"0\"/>", file = fileout, sep = "\n", append = T)
+} else {
+  cat("<Filter name = \"biotype\" value = \"protein_coding\"/>", file = fileout, sep = "\n", append = T)
+}
+#### start attributes
+cat("<Attribute name = \"ensembl_gene_id\" />", file = fileout, sep = "\n", append = T)
+cat("<Attribute name = \"chromosome_name\" />", file = fileout, sep = "\n", append = T)
+cat("<Attribute name = \"start_position\" />", file = fileout, sep = "\n", append = T)
+#### end attributes
+cat("</Dataset>", file = fileout, sep = "\n", append = T)
+cat("</Query>", file = fileout, sep = "\n", append = T)
+
+## geneinfo file
+fileout <- file.path(tmpfolder, "geneinfo.xml")
+cat("<?xml version=\"1.0\" encoding=\"UTF-8\"?>", file = fileout, sep = "\n")
+cat("<Query  virtualSchemaName = \"default\" formatter = \"TSV\" header = \"0\" uniqueRows = \"0\" count = \"\" datasetConfigVersion = \"0.6\" >", file = fileout, sep = "\n", append = T)
+cat("", file = fileout, sep = "\n", append = T)
+cat(paste("<Dataset name = \"", myrefid.xml,"\" interface = \"default\" >", sep = ""), file = fileout, sep = "\n", append = T)
+#cat("<Filter name = \"status\" value = \"KNOWN\"/>", file = fileout, sep = "\n", append = T)
+#cat("<Filter name = \"transcript_status\" value = \"KNOWN\"/>", file = fileout, sep = "\n", append = T)
+if (myrefid == "homo_sapiens") {
+  cat("<Filter name = \"biotype\" value = \"processed_transcript,protein_coding\"/>", file = fileout, sep = "\n", append = T)
+  cat("<Filter name = \"with_hgnc\" excluded = \"0\"/>", file = fileout, sep = "\n", append = T)
+} else {
+  cat("<Filter name = \"biotype\" value = \"protein_coding\"/>", file = fileout, sep = "\n", append = T)
+} 
+#### start attributes
+cat("<Attribute name = \"ensembl_gene_id\" />", file = fileout, sep = "\n", append = T)
+if (ensversion > 0 & ensversion <= 75) {
+  cat("<Attribute name = \"external_gene_id\" />", file = fileout, sep = "\n", append = T)  
+} else {
+  cat("<Attribute name = \"external_gene_name\" />", file = fileout, sep = "\n", append = T)
+}
+cat("<Attribute name = \"description\" />", file = fileout, sep = "\n", append = T)
+#### end attributes
+cat("</Dataset>", file = fileout, sep = "\n", append = T)
+cat("</Query>", file = fileout, sep = "\n", append = T)
+
+## exonstartend file
+fileout <- file.path(tmpfolder, "exonstartend.xml")
+cat("<?xml version=\"1.0\" encoding=\"UTF-8\"?>", file = fileout, sep = "\n")
+cat("<Query  virtualSchemaName = \"default\" formatter = \"TSV\" header = \"0\" uniqueRows = \"0\" count = \"\" datasetConfigVersion = \"0.6\" >", file = fileout, sep = "\n", append = T)
+cat("", file = fileout, sep = "\n", append = T)
+cat(paste("<Dataset name = \"", myrefid.xml,"\" interface = \"default\" >", sep = ""), file = fileout, sep = "\n", append = T)
+#cat("<Filter name = \"status\" value = \"KNOWN\"/>", file = fileout, sep = "\n", append = T)
+#cat("<Filter name = \"transcript_status\" value = \"KNOWN\"/>", file = fileout, sep = "\n", append = T)
+if (myrefid == "homo_sapiens") {
+  cat("<Filter name = \"biotype\" value = \"processed_transcript,protein_coding\"/>", file = fileout, sep = "\n", append = T)
+  cat("<Filter name = \"with_hgnc\" excluded = \"0\"/>", file = fileout, sep = "\n", append = T)
+} else {
+  cat("<Filter name = \"biotype\" value = \"protein_coding\"/>", file = fileout, sep = "\n", append = T)
+} 
+#### start attributes
+cat("<Attribute name = \"ensembl_gene_id\" />", file = fileout, sep = "\n", append = T)
+cat("<Attribute name = \"exon_chrom_start\" />", file = fileout, sep = "\n", append = T)
+cat("<Attribute name = \"exon_chrom_end\" />", file = fileout, sep = "\n", append = T)
+cat("<Attribute name = \"chromosome_name\" />", file = fileout, sep = "\n", append = T)
+#### end attributes
+cat("</Dataset>", file = fileout, sep = "\n", append = T)
+cat("</Query>", file = fileout, sep = "\n", append = T)
+
+## strand file
+fileout <- file.path(tmpfolder, "strand.xml")
+cat("<?xml version=\"1.0\" encoding=\"UTF-8\"?>", file = fileout, sep = "\n")
+cat("<Query  virtualSchemaName = \"default\" formatter = \"TSV\" header = \"0\" uniqueRows = \"0\" count = \"\" datasetConfigVersion = \"0.6\" >", file = fileout, sep = "\n", append = T)
+cat("", file = fileout, sep = "\n", append = T)
+cat(paste("<Dataset name = \"", myrefid.xml,"\" interface = \"default\" >", sep = ""), file = fileout, sep = "\n", append = T)
+#cat("<Filter name = \"status\" value = \"KNOWN\"/>", file = fileout, sep = "\n", append = T)
+#cat("<Filter name = \"transcript_status\" value = \"KNOWN\"/>", file = fileout, sep = "\n", append = T)
+if (myrefid == "homo_sapiens") {
+  cat("<Filter name = \"biotype\" value = \"processed_transcript,protein_coding\"/>", file = fileout, sep = "\n", append = T)
+  cat("<Filter name = \"with_hgnc\" excluded = \"0\"/>", file = fileout, sep = "\n", append = T)
+} else {
+  cat("<Filter name = \"biotype\" value = \"protein_coding\"/>", file = fileout, sep = "\n", append = T)
+} 
+#### start attributes
+cat("<Attribute name = \"ensembl_gene_id\" />", file = fileout, sep = "\n", append = T)
+cat("<Attribute name = \"strand\" />", file = fileout, sep = "\n", append = T)
+#### end attributes
+cat("</Dataset>", file = fileout, sep = "\n", append = T)
+cat("</Query>", file = fileout, sep = "\n", append = T)
+
+
+## paralogs file (if it exists)
+if (myrefid == "homo_sapiens") {
+  fileout <- file.path(tmpfolder, "paralogs.xml")
+  cat("<?xml version=\"1.0\" encoding=\"UTF-8\"?>", file = fileout, sep = "\n")
+  cat("<Query  virtualSchemaName = \"default\" formatter = \"TSV\" header = \"0\" uniqueRows = \"0\" count = \"\" datasetConfigVersion = \"0.6\" >", file = fileout, sep = "\n", append = T)
+  cat("", file = fileout, sep = "\n", append = T)
+  cat(paste("<Dataset name = \"", myrefid.xml,"\" interface = \"default\" >", sep = ""), file = fileout, sep = "\n", append = T)
+#  cat("<Filter name = \"status\" value = \"KNOWN\"/>", file = fileout, sep = "\n", append = T)
+#  cat("<Filter name = \"transcript_status\" value = \"KNOWN\"/>", file = fileout, sep = "\n", append = T)
+  if (myrefid == "homo_sapiens") {
+    cat("<Filter name = \"biotype\" value = \"processed_transcript,protein_coding\"/>", file = fileout, sep = "\n", append = T)
+    cat("<Filter name = \"with_hgnc\" excluded = \"0\"/>", file = fileout, sep = "\n", append = T)
+  } else {
+    cat("<Filter name = \"biotype\" value = \"protein_coding\"/>", file = fileout, sep = "\n", append = T)
+  } 
+  #### start attributes
+  cat("<Attribute name = \"ensembl_gene_id\" />", file = fileout, sep = "\n", append = T)
+  cat("<Attribute name = \"hsapiens_paralog_ensembl_gene\" />", file = fileout, sep = "\n", append = T)
+  #### end attributes
+  cat("</Dataset>", file = fileout, sep = "\n", append = T)
+  cat("</Query>", file = fileout, sep = "\n", append = T)
+}
+
+##  transcripts (eric the simulator)
+fileout <- file.path(tmpfolder, "transcripts.xml")
+cat("<?xml version=\"1.0\" encoding=\"UTF-8\"?>", file = fileout, sep = "\n")
+cat("<Query  virtualSchemaName = \"default\" formatter = \"TSV\" header = \"0\" uniqueRows = \"0\" count = \"\" datasetConfigVersion = \"0.6\" >", file = fileout, sep = "\n", append = T)
+cat("", file = fileout, sep = "\n", append = T)
+cat(paste("<Dataset name = \"", myrefid.xml,"\" interface = \"default\" >", sep = ""), file = fileout, sep = "\n", append = T)
+#cat("<Filter name = \"status\" value = \"KNOWN\"/>", file = fileout, sep = "\n", append = T)
+#cat("<Filter name = \"transcript_status\" value = \"KNOWN\"/>", file = fileout, sep = "\n", append = T)
+if (myrefid == "homo_sapiens") {
+  cat("<Filter name = \"biotype\" value = \"processed_transcript,protein_coding\"/>", file = fileout, sep = "\n", append = T)
+  cat("<Filter name = \"with_hgnc\" excluded = \"0\"/>", file = fileout, sep = "\n", append = T)
+} else {
+  cat("<Filter name = \"biotype\" value = \"protein_coding\"/>", file = fileout, sep = "\n", append = T)
+} 
+#### start attributes
+cat("<Attribute name = \"ensembl_gene_id\" />", file = fileout, sep = "\n", append = T)
+cat("<Attribute name = \"ensembl_transcript_id\" />", file = fileout, sep = "\n", append = T)
+cat("<Attribute name = \"exon_chrom_start\" />", file = fileout, sep = "\n", append = T)
+cat("<Attribute name = \"exon_chrom_end\" />", file = fileout, sep = "\n", append = T)
+cat("<Attribute name = \"chromosome_name\" />", file = fileout, sep = "\n", append = T)
+cat("<Attribute name = \"strand\" />", file = fileout, sep = "\n", append = T)
+#### end attributes
+cat("</Dataset>", file = fileout, sep = "\n", append = T)
+cat("</Query>", file = fileout, sep = "\n", append = T)
+
+fileout <- file.path(tmpfolder, "transcripts_cdna.xml")
+cat("<?xml version=\"1.0\" encoding=\"UTF-8\"?>", file = fileout, sep = "\n")
+cat("<Query  virtualSchemaName = \"default\" formatter = \"TSV\" header = \"0\" uniqueRows = \"0\" count = \"\" datasetConfigVersion = \"0.6\" >", file = fileout, sep = "\n", append = T)
+cat("", file = fileout, sep = "\n", append = T)
+cat(paste("<Dataset name = \"", myrefid.xml,"\" interface = \"default\" >", sep = ""), file = fileout, sep = "\n", append = T)
+#cat("<Filter name = \"status\" value = \"KNOWN\"/>", file = fileout, sep = "\n", append = T)
+#cat("<Filter name = \"transcript_status\" value = \"KNOWN\"/>", file = fileout, sep = "\n", append = T)
+if (myrefid == "homo_sapiens") {
+  cat("<Filter name = \"biotype\" value = \"processed_transcript,protein_coding\"/>", file = fileout, sep = "\n", append = T)
+  cat("<Filter name = \"with_hgnc\" excluded = \"0\"/>", file = fileout, sep = "\n", append = T)
+} else {
+  cat("<Filter name = \"biotype\" value = \"protein_coding\"/>", file = fileout, sep = "\n", append = T)
+} 
+#### start attributes
+cat("<Attribute name = \"ensembl_transcript_id\" />", file = fileout, sep = "\n", append = T)
+cat("<Attribute name = \"cdna\" />", file = fileout, sep = "\n", append = T)
+#### end attributes
+cat("</Dataset>", file = fileout, sep = "\n", append = T)
+cat("</Query>", file = fileout, sep = "\n", append = T)
+
+## download gene data
+system(paste("perl", file.path(ericscriptfolder, "lib", "perl", "retrievefrombiomart.pl"), file.path(tmpfolder, "genepos.xml"), ensversion, "| sort -u - >", file.path(tmpfolder, "genepos.txt")))
+system(paste("perl", file.path(ericscriptfolder, "lib", "perl", "retrievefrombiomart.pl"), file.path(tmpfolder, "geneinfo.xml"), ensversion, "| sort -u - >", file.path(tmpfolder, "geneinfo.txt")))
+system(paste("perl", file.path(ericscriptfolder, "lib", "perl", "retrievefrombiomart.pl"), file.path(tmpfolder, "exonstartend.xml"), ensversion, "| sort -u - >", file.path(tmpfolder, "exonstartend.txt")))
+system(paste("perl", file.path(ericscriptfolder, "lib", "perl", "retrievefrombiomart.pl"), file.path(tmpfolder, "strand.xml"), ensversion, "| sort -u - >", file.path(tmpfolder, "strand.txt")))
+system(paste("perl", file.path(ericscriptfolder, "lib", "perl", "retrievefrombiomart.pl"), file.path(tmpfolder, "transcripts.xml"), ensversion, "| sort -u - >", file.path(tmpfolder, "transcripts.txt")))
+system(paste("perl", file.path(ericscriptfolder, "lib", "perl", "retrievefrombiomart.pl"), file.path(tmpfolder, "transcripts_cdna.xml"), ensversion, ">", file.path(tmpfolder, "transcripts.fa")))
+if (myrefid == "homo_sapiens") {
+  system(paste("perl", file.path(ericscriptfolder, "lib", "perl", "retrievefrombiomart.pl"), file.path(tmpfolder, "paralogs.xml"), ensversion, "| sort -u - >", file.path(tmpfolder, "paralogs.txt")))
+  acc.chrs <- c(1:22, "X", "Y")
+  cat (acc.chrs, file = file.path(tmpfolder, "chrlist"), sep = "\n")
+}
+## download seq data
+download.file(file.path("ftp://ftp.ensembl.org/pub", paste("release-", ensversion, sep = ""), "fasta", myrefid, "dna", myrefid.path), destfile = file.path(tmpfolder, "seq.fa.gz"), quiet = T)
+
+
+
diff --git a/lib/R/EstimateSpanningReads.R b/lib/R/EstimateSpanningReads.R
new file mode 100644
index 0000000..ba41eb7
--- /dev/null
+++ b/lib/R/EstimateSpanningReads.R
@@ -0,0 +1,124 @@
+## EstimateSpanningReads v2: exclude soft clipped reads
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+samplename <- split.vars [1]
+outputfolder <- split.vars[2]
+readlength <- max(as.numeric(split.vars[3]))
+load(file.path(outputfolder,"out",paste(samplename,".junctions.recalibrated.RData", sep = "")))
+load(file.path(outputfolder, "out", paste(samplename, ".ids_fasta.RData", sep = "")))
+load(file.path(outputfolder, "out", "isize.RData"))
+mysigma <- readlength/4
+myref <- sort(dnorm(seq(1, readlength, by = 1), readlength/2, mysigma), decreasing = T)
+DataMatrix <- matrix(NA, nrow = length(ids_fasta), ncol = 9)
+for (i in 1: length(ids_fasta)) {
+	junction.tmp <- junctions.recalibrated[i]
+	x <- system(paste("samtools view ", file.path(outputfolder, "aln", paste(samplename,".remap.recal.sorted.rmdup.bam", sep = ""))," ", ids_fasta[i],  ":", junction.tmp, "-", junction.tmp + 1, " | awk '($5>0)'  | cut -f 1,4,6 > ", file.path(outputfolder,"out",".spanningreads.sam"),sep = ""))
+	x <- system(paste("samtools view ", file.path(outputfolder, "aln", paste(samplename,".remap.recal.sorted.rmdup.bam", sep = ""))," ", ids_fasta[i]," | awk '($9>0)'  | cut -f 9 - > ", file.path(outputfolder,"out",".insertsize.sam"),sep = ""))
+	x <- system(paste("samtools view ", file.path(outputfolder, "aln", paste(samplename,".remap.recal.sorted.rmdup.bam", sep = ""))," ", ids_fasta[i]," | awk '($4<", junction.tmp, ") && ($8>",junction.tmp+1,")' | cut -f 1,4 -  > ", file.path(outputfolder,"out",".crossingreads.sam"),sep = ""))
+	
+	spanningreads.tmp <- scan(file.path(outputfolder,"out",".spanningreads.sam"), sep = "\t", what = list("", 1, ""), quiet = T)
+	ix.nosc <- sort(intersect(grep("^[0-9]*S", spanningreads.tmp[[3]], perl = T, invert = T), grep("[0-9]*S$", spanningreads.tmp[[3]], perl = T, invert = T)))
+	spanningreads <- vector("list", length = 3)
+  if (length(ix.nosc) > 0) {
+    spanningreads[[1]] <-  spanningreads.tmp[[1]][ix.nosc]   
+    spanningreads[[2]] <-  spanningreads.tmp[[2]][ix.nosc]   
+    spanningreads[[3]] <-  spanningreads.tmp[[3]][ix.nosc]   
+  } else {
+    spanningreads <- spanningreads.tmp
+  }
+	crossingreads <- scan(file.path(outputfolder,"out",".crossingreads.sam"), sep = "\t", what = list("", 1), quiet = T)
+	insert.size <- mean(abs(as.numeric(readLines(file.path(outputfolder,"out",".insertsize.sam"))))) - readlength
+	id.spanningreads <- spanningreads[[1]]
+	pos.spanningreads <- spanningreads[[2]]
+	id.crossingreads <- crossingreads[[1]]
+	pos.crossingreads <- crossingreads[[2]]
+	spanning.score <- 0
+	edge.score <- 0
+	range.pos.crossingreads <- 0
+	if (length(pos.crossingreads) > 0) {
+		range.pos.crossingreads <- junction.tmp - min(pos.crossingreads)
+	}
+	n.crossingreads <- length(which(id.crossingreads %in% id.spanningreads == F))
+	n.spanningreads <- 0
+	gjs <- 0
+	unique.score <- 0
+	us.prob <- 0
+	insertsize.score <- 0
+	if (length(pos.spanningreads) > 0) {
+		pos <- pos.spanningreads - junction.tmp + readlength
+		us.pos <- tabulate(pos, nbins = readlength)
+		
+		if (sum(us.pos) > 0) {
+			us.mult <- floor(sum(us.pos)/readlength)
+			us.residuals <- sum(us.pos)/readlength - floor(sum(us.pos)/readlength) 
+			us.refdistr <- rep(us.mult, readlength)
+			if (us.residuals > 0) {
+				for (kk in 1: (sum(us.pos) - us.mult*readlength)) {
+					us.refdistr[kk] <- us.refdistr[kk] + 1  
+				}
+			} else {
+				us.refdistr[1] <- us.refdistr[1] + 1
+				us.refdistr[2] <- us.refdistr[2] - 1
+			}
+			
+			ff <- which(sort(us.pos!=0))
+			us.prob <- 1- sum(abs(sort(us.pos)[ff]-sort(us.refdistr)[ff]))/sum(us.pos)
+		}
+
+		mynorm <- sum(myref[1:length(unique(pos))])
+		prob <- sum(dnorm(unique(pos),  readlength/2, mysigma))
+		gjs <- prob/mynorm
+		
+		
+		left.spanningreads <- pos.spanningreads[(pos.spanningreads <= (junction.tmp - round(readlength/3)))]
+		right.spanningreads <- pos.spanningreads[(pos.spanningreads > (junction.tmp - round(readlength/3)))]
+		n.left.spanningreads <- length(left.spanningreads )
+		n.right.spanningreads <- length(right.spanningreads)
+		spanning.score <- 1- abs(n.left.spanningreads - n.right.spanningreads)/(n.left.spanningreads + n.right.spanningreads)
+		
+		if (length(left.spanningreads) > 0) {
+			left.score <- mean((junction.tmp - readlength)-left.spanningreads)
+		} else {
+			left.score <- 0
+		}
+		if (length(right.spanningreads) > 0) {
+			right.score <- mean(right.spanningreads - junction.tmp)
+		} else {
+			right.score <- 0
+		}
+		edge.score <- 1 - 1.1^(mean(c(left.score,right.score)))
+		insertsize.score <- dnorm(insert.size, isize.mean, isize.sd)/dnorm(isize.mean, isize.mean, isize.sd)
+		n.spanningreads <- length(pos.spanningreads)
+		
+	}
+	
+	DataMatrix[i,] <- c(ids_fasta[i], n.crossingreads , insert.size, n.spanningreads, range.pos.crossingreads, edge.score, gjs, us.prob, insertsize.score)
+	
+}
+colnames(DataMatrix) <- c("id", "nreads","mean_ins_size","nreads_junc", "rangepos", "edgescore", "gjs", "uniformity.score", "isize.score")
+save(DataMatrix, file = file.path(outputfolder,"out",paste(samplename, ".DataMatrix.RData", sep = "")))
+
+filecon <- file(file.path(outputfolder,"out", paste(samplename, ".intervals", sep = "")), open = "w")
+ix.filter <- sort(unique(intersect(which(DataMatrix[,4] > 0), which(DataMatrix[,3] != "NaN"))))
+if (length(ix.filter) > 0) {
+  width <- 100
+  id.filtered <- ids_fasta[ix.filter]
+  save(id.filtered, file = file.path(outputfolder, "out",paste(samplename,".ids_filtered.RData", sep = "")))
+  for (i in 1:length(ix.filter)) {
+	  ix.ref <- ix.filter[i]
+	  junction <- junctions.recalibrated[ix.ref]
+	  pileup.interval <- seq.int((junction - (width/2 - 1)), (junction + (width/2)))
+	  pileup.interval[which(pileup.interval < 1)] <- 1
+	  pileup.interval <- unique(pileup.interval)
+	  cat(paste(rep(id.filtered[i], length(pileup.interval)), pileup.interval, sep = " "), file = filecon, sep = "\n", append = T)
+  }
+  close(filecon)
+  myflag <- 1
+	cat(myflag, file = file.path(outputfolder, "out", ".ericscript.flag"))
+} else {
+  myflag <- 0
+  cat(myflag, file = file.path(outputfolder, "out", ".ericscript.flag"))
+	stop("No chimeric transcripts found. Exit!")
+} 
+
diff --git a/lib/R/ExtractInsertSize.R b/lib/R/ExtractInsertSize.R
new file mode 100644
index 0000000..46831ca
--- /dev/null
+++ b/lib/R/ExtractInsertSize.R
@@ -0,0 +1,33 @@
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+outputfolder <- split.vars[1]
+bwa_aln <- as.numeric(as.character(split.vars[2]))
+if (bwa_aln == 0) {
+  xx <- readLines(file.path(outputfolder, "out", ".ericscript.log"))
+  ix.isize <- grep("mean and std.dev:", xx)
+  if (length(ix.isize) > 0) {
+    isize.tmp <- strsplit(tail(xx[ix.isize], n = 1), ": ")[[1]][2]
+    isize.tmp1 <- unlist(strsplit(isize.tmp, ",", fixed = T))
+    isize.mean <- as.numeric(substr(isize.tmp1[1], 2, nchar(isize.tmp1[1])))
+    isize.sd <- as.numeric(substr(isize.tmp1[2], 1, nchar(isize.tmp1[2])-1))
+  } else {
+    isize.mean <- 200
+    isize.sd <- 40
+  }
+  save(isize.mean, isize.sd, file = file.path(outputfolder, "out", "isize.RData"))
+} else {
+  xx <- readLines(file.path(outputfolder, "out", ".ericscript.log"))
+  ix.isize <- grep("inferred external isize from", xx)
+  if (length(ix.isize) > 0) {
+    isize.tmp <- strsplit(tail(xx[ix.isize], n = 1), ": ")[[1]][2]
+    isize.tmp1 <- unlist(strsplit(isize.tmp, "+/-", fixed = T))
+    isize.mean <- as.numeric(isize.tmp1[1])
+    isize.sd <- as.numeric(isize.tmp1[2])
+  } else {
+    isize.mean <- 200
+    isize.sd <- 40
+  }
+  save(isize.mean, isize.sd, file = file.path(outputfolder, "out", "isize.RData"))
+  
+}
diff --git a/lib/R/ImportResults.R b/lib/R/ImportResults.R
new file mode 100644
index 0000000..860666a
--- /dev/null
+++ b/lib/R/ImportResults.R
@@ -0,0 +1,409 @@
+### ensgene converter
+
+toens <- function(ericscriptfolder, genename) {
+	
+	load(file.path(ericscriptfolder, "lib", "data", "EnsemblGene.GeneInfo.RData"))
+	ensgene <- as.character(EnsemblGene.GeneInfo$EnsemblGene)[which(as.character(EnsemblGene.GeneInfo$GeneName) == genename)]
+	if (length(ensgene) == 0) {ensgene <- NA}
+	return(ensgene)
+	
+}
+
+
+convertToComplement<-function(x) {
+	
+	bases=c("A","C","G","T")
+	xx<-unlist(strsplit(toupper(x), NULL))
+	paste(unlist(lapply(xx, function(bbb) {
+						if(bbb=="A") compString <- "T"
+						if(bbb=="C") compString <- "G"
+						if(bbb=="G") compString <- "C"
+						if(bbb=="T") compString <- "A"
+						if(!bbb %in% bases) compString <- "N"
+						return(compString)
+						})),collapse="")
+	
+}
+
+### import results from algorithm''s output
+
+Import_ericscript <- function(outputpath) {
+	
+	filename <- grep(".results.total.tsv", list.files(outputpath), value = T)
+	
+	if (length(filename) == 1) {
+		xx <- read.delim(file.path(outputpath, filename), sep = "\t", header = T)
+		gene5 <- as.character(xx$EnsemblGene1)
+		gene3 <- as.character(xx$EnsemblGene2)
+		nreads <- as.numeric(as.character(xx$spanningreads))
+		score <- as.numeric(as.character(xx$EricScore))
+		seq <- as.character(xx$JunctionSequence)
+		
+		algout <- list()
+		algout$gene5 <- gene5
+		algout$gene3 <- gene3
+		algout$nreads <- nreads
+		algout$score <- score
+		algout$seq <- seq
+		
+		return(algout)
+	} else if (length(filename) == 0) {
+		
+		return(0)    
+		
+	} else if (length(filename) > 1) {
+		
+		return(length(filename))
+		
+	}
+    
+	
+}
+
+
+
+Import_defuse <- function(outputpath) {
+	
+	filename <- grep(".classify.tsv", list.files(outputpath), value = T)
+    
+	if (length(filename) == 1) {
+		
+		xx <- read.delim(file.path(outputpath, filename), sep = "\t", header = T)
+		gene5 <- as.character(xx$gene1)
+		gene3 <- as.character(xx$gene2)
+		nreads <- as.numeric(as.character(xx$splitr_count))
+		score <- as.numeric(as.character(xx$probability))
+		seq <- rep("", dim(xx)[1])
+		for (seqd in 1: dim(xx)[1]) {
+			tmp <-  unlist(strsplit(as.character(xx$splitr_sequence[seqd]), "|", fixed = T))
+			seq[seqd] <- paste(substr(tmp[1], (nchar(tmp[1])-29), nchar(tmp[1])), substr(tmp[2], 1, 30), sep = "")
+		}
+		
+		
+		algout <- list()
+		algout$gene5 <- gene5
+		algout$gene3 <- gene3
+		algout$nreads <- nreads
+		algout$score <- score
+		algout$seq <- seq
+		
+		return(algout)
+	} else if (length(filename) == 0) {
+		
+		return(0)    
+		
+	} else if (length(filename) > 1) {
+		
+		return(length(filename))
+		
+	}
+	
+}
+
+
+Import_chimerascan <- function(outputpath) {
+	
+	filename <- grep("chimeras.bedpe", list.files(outputpath), value = T)
+	
+	if (length(filename) == 1) {
+		
+		xx <- read.delim(file.path(outputpath, filename), sep = "\t", header = T)
+		gene1tmp <- as.character(xx$genes5p)
+		gene2tmp <- as.character(xx$genes3p)
+		nreadstmp <- as.numeric(as.character(xx$total_frags))
+		scoretmp <- as.numeric(as.character(xx$score))
+		
+		gene1 <- c()
+		gene2 <- c()
+		nreads <- c()
+		score <- c()
+		
+		for (i in 1: length(gene1tmp)) {
+			
+			if((length(grep(",", gene1tmp[i])) > 0) & (length(grep(",", gene2tmp[i])) == 0)) {
+				
+				gene1 <- c(gene1, unlist(strsplit(gene1tmp[i], ",")))
+				myrep <- length(unlist(strsplit(gene1tmp[i], ",")))
+				gene2 <- c(gene2, rep(gene2tmp[i], myrep))
+				nreads <- c(nreads, rep(nreadstmp[i], myrep))
+				score <- c(score, rep(scoretmp[i], myrep))  
+				
+			} else if ((length(grep(",", gene1tmp[i])) == 0) & (length(grep(",", gene2tmp[i])) > 0)) {
+				
+				gene2 <- c(gene2, unlist(strsplit(gene2tmp[i], ",")))
+				myrep <- length(unlist(strsplit(gene2tmp[i], ",")))
+				gene1 <- c(gene1, rep(gene1tmp[i], myrep))
+				nreads <- c(nreads, rep(nreadstmp[i], myrep))
+				score <- c(score, rep(scoretmp[i], myrep))  
+				
+			} else if ((length(grep(",", gene1tmp[i])) > 0) & (length(grep(",", gene2tmp[i])) > 0)) {
+				
+				gene1tmp1 <- unlist(strsplit(gene1tmp[i], ","))
+				gene2tmp1 <- unlist(strsplit(gene2tmp[i], ","))
+				myrep1 <- length(unlist(strsplit(gene1tmp[i], ",")))
+				myrep2 <- length(unlist(strsplit(gene2tmp[i], ",")))     
+				
+				for (j in 1: myrep1) {
+					
+					gene1 <- c(gene1, rep(gene1tmp1[j], myrep2))
+					gene2 <- c(gene2, gene2tmp1)
+					nreads <- c(nreads, rep(nreadstmp[i], myrep2))
+					score <- c(score, rep(scoretmp[i], myrep2))
+					
+				}
+				
+			} else {
+				
+				gene1 <- c(gene1, gene1tmp[i])
+				gene2 <- c(gene2, gene2tmp[i])
+				nreads <- c(nreads, nreadstmp[i])
+				score <- c(score, scoretmp[i])
+				
+			}
+			
+		}
+		
+		seq <- rep(NA, length(gene1))
+		gene5 <- rep(NA, length(gene1))
+		gene3 <- rep(NA, length(gene1))
+		
+		for (i in 1: length(gene1)) {
+			
+			gene5[i] <- toens(ericscriptfolder, gene1[i])
+			gene3[i] <- toens(ericscriptfolder, gene2[i])
+			
+		}  
+		
+		algout <- list()
+		algout$gene5 <- gene5
+		algout$gene3 <- gene3
+		algout$nreads <- nreads
+		algout$score <- score
+		algout$seq <- seq
+		
+		return(algout)
+	} else if (length(filename) == 0) {
+		
+		return(0)    
+		
+	} else if (length(filename) > 1) {
+		
+		return(length(filename))
+		
+	}
+	
+}
+
+
+
+Import_shortfuse <- function(outputpath) {
+	
+	filename <- grep("fusion_counts.bedpe", list.files(outputpath), value = T)
+	
+	if (length(filename) == 1) {
+		
+		xx <- read.delim(file.path(outputpath, filename), sep = "\t", header = F)
+		gene1tmp <- as.character(xx[, 11])
+		gene2tmp <- as.character(xx[, 12])
+		nreadstmp <- as.numeric(as.character(xx[, 8]))
+		scoretmp <- as.numeric(as.character(xx[, 8]))
+		
+		gene12 <- paste(gene1tmp, gene2tmp)
+		ixNOdupgene <- which(duplicated(gene12) == F)
+		
+		gene5 <- rep(NA, length(ixNOdupgene))
+		gene3 <- rep(NA, length(ixNOdupgene))
+		
+		for (i in 1: length(ixNOdupgene)) {
+			
+			gene5[i] <- toens(ericscriptfolder, gene1tmp[ixNOdupgene[i]])
+			gene3[i] <- toens(ericscriptfolder, gene2tmp[ixNOdupgene[i]])
+			
+		}  
+		
+		nreads <- nreadstmp[ixNOdupgene]
+		score <- scoretmp[ixNOdupgene]
+		seq <- rep(NA, length(nreads))
+		
+		algout <- list()
+		algout$gene5 <- gene5
+		algout$gene3 <- gene3
+		algout$nreads <- nreads
+		algout$score <- score
+		algout$seq <- seq
+		
+		return(algout)
+	} else if (length(filename) == 0) {
+		
+		return(0)    
+		
+	} else if (length(filename) > 1) {
+		
+		return(length(filename))
+		
+	}
+	
+}
+
+
+
+
+Import_fusionmap <- function(outputpath) {
+	
+	filename <- grep("FusionReport.txt", list.files(outputpath), value = T)
+	
+	if (length(filename) == 1) {
+		
+		xx <- read.delim(file.path(outputpath, filename), sep = "\t", header = T)
+		
+		transcriptversetmp <- as.character(xx$FusionGene)
+		genelisttmp <- unlist(strsplit(transcriptversetmp, "->"))
+		gene1tmp <- genelisttmp[seq(1, length(genelisttmp), by = 2)]
+		gene2tmp <- genelisttmp[seq(2, length(genelisttmp), by = 2)]
+		gene1tmp1 <- as.character(xx$KnownGene1)
+		gene2tmp1 <- as.character(xx$KnownGene2)
+		seqtmp <- as.character(xx$FusionJunctionSequence)
+		ix.reverse <- which(gene1tmp %in% gene1tmp1 == F)
+		for (ii in 1: length(ix.reverse)) {  
+			seqtmp[ix.reverse[ii]] <- convertToComplement(reverse(seqtmp[ix.reverse[ii]]))
+		}
+		nreadstmp <- as.numeric(as.character(xx[, 2]))
+		scoretmp <- as.numeric(as.character(xx[, 2]))
+		
+		gene1 <- c()
+		gene2 <- c()
+		nreads <- c()
+		score <- c()
+		seq <- c()
+		
+		for (i in 1: length(gene1tmp)) {
+			
+			if((length(grep(",", gene1tmp[i])) > 0) & (length(grep(",", gene2tmp[i])) == 0)) {
+				
+				gene1 <- c(gene1, unlist(strsplit(gene1tmp[i], ",")))
+				myrep <- length(unlist(strsplit(gene1tmp[i], ",")))
+				gene2 <- c(gene2, rep(gene2tmp[i], myrep))
+				nreads <- c(nreads, rep(nreadstmp[i], myrep))
+				score <- c(score, rep(scoretmp[i], myrep))  
+				seq <- c(seq, rep(seqtmp[i], myrep))
+				
+			} else if ((length(grep(",", gene1tmp[i])) == 0) & (length(grep(",", gene2tmp[i])) > 0)) {
+				
+				gene2 <- c(gene2, unlist(strsplit(gene2tmp[i], ",")))
+				myrep <- length(unlist(strsplit(gene2tmp[i], ",")))
+				gene1 <- c(gene1, rep(gene1tmp[i], myrep))
+				nreads <- c(nreads, rep(nreadstmp[i], myrep))
+				score <- c(score, rep(scoretmp[i], myrep))  
+				seq <- c(seq, rep(seqtmp[i], myrep))
+				
+				
+			} else if ((length(grep(",", gene1tmp[i])) > 0) & (length(grep(",", gene2tmp[i])) > 0)) {
+				
+				gene1tmp1 <- unlist(strsplit(gene1tmp[i], ","))
+				gene2tmp1 <- unlist(strsplit(gene2tmp[i], ","))
+				myrep1 <- length(unlist(strsplit(gene1tmp[i], ",")))
+				myrep2 <- length(unlist(strsplit(gene2tmp[i], ",")))     
+				
+				for (j in 1: myrep1) {
+					
+					gene1 <- c(gene1, rep(gene1tmp1[j], myrep2))
+					gene2 <- c(gene2, gene2tmp1)
+					nreads <- c(nreads, rep(nreadstmp[i], myrep2))
+					score <- c(score, rep(scoretmp[i], myrep2))     
+					seq <- c(seq, rep(seqtmp[i], myrep2))
+					
+				}
+				
+			} else {
+				
+				gene1 <- c(gene1, gene1tmp[i])
+				gene2 <- c(gene2, gene2tmp[i])
+				nreads <- c(nreads, nreadstmp[i])
+				score <- c(score, scoretmp[i])
+				seq <- c(seq, seqtmp[i])
+				
+			}
+			
+		}
+		
+		gene5 <- rep(NA, length(gene1))
+		gene3 <- rep(NA, length(gene1))
+		
+		for (i in 1: length(gene1)) {
+			
+			gene5[i] <- toens(ericscriptfolder, gene1[i])
+			gene3[i] <- toens(ericscriptfolder, gene2[i])
+			
+		}  
+		
+		algout <- list()
+		algout$gene5 <- gene5
+		algout$gene3 <- gene3
+		algout$nreads <- nreads
+		algout$score <- score
+		algout$seq <- seq
+		
+		return(algout)
+	} else if (length(filename) == 0) {
+		
+		return(0)    
+		
+	} else if (length(filename) > 1){
+		
+		return(length(filename))
+		
+	}
+	
+}
+
+
+
+
+Import_unknown <- function(outputpath) {
+	
+	filename <- grep("ericsim", list.files(outputpath), value = T)
+	
+	if (length(filename) == 1) {
+		
+		xx <- read.delim(file.path(outputpath, filename), sep = "\t", header = T)
+		
+		gene5 <- as.character(xx$gene5)
+		gene3 <- as.character(xx$gene3)
+		nreads <- as.numeric(as.character(xx$nread))
+		score <- as.numeric(as.character(xx$score))
+		seq <- as.character(x$seq)
+		
+		algout <- list()
+		algout$gene5 <- gene5
+		algout$gene3 <- gene3
+		algout$nreads <- nreads
+		algout$score <- score
+		algout$seq <- seq
+		
+		return(algout)
+		
+	} else if (length(filename) == 0) {
+		
+		return(0)    
+		
+	} else if (length(filename) > 1) {
+		
+		return(length(filename))
+		
+	}
+	
+	
+	
+}
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/lib/R/MakeAdjacencyMatrix.R b/lib/R/MakeAdjacencyMatrix.R
new file mode 100644
index 0000000..0860369
--- /dev/null
+++ b/lib/R/MakeAdjacencyMatrix.R
@@ -0,0 +1,119 @@
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+samplename <- split.vars [1]
+outputfolder <- split.vars[2]
+ericscriptfolder <- split.vars[3]
+minreads <- as.numeric(split.vars[4])
+MAPQ <-  as.numeric(split.vars[5])
+refid <- as.character(split.vars[6])
+dbfolder <- as.character(split.vars[7])
+
+filein <- file.path(outputfolder, "out", paste(samplename, ".filtered.out", sep = ""))
+xx <- readLines(filein, n = 1)
+if (length(xx) == 0) {
+  myflag <- 0
+  cat(myflag, file = file.path(outputfolder, "out", ".ericscript.flag"))
+  stop("No lines available in ",filein,". No discordant reads found with MAPQ set to ", MAPQ, ". Try to decrease MAPQ parameter and run again EricScript. Exit!")
+} else {
+  myflag <- 1
+  cat(myflag, file = file.path(outputfolder, "out", ".ericscript.flag"))
+}
+x <- read.delim(filein, sep = "\t", header = F)
+load(file.path(dbfolder,"data", refid, "EnsemblGene.GenePosition.RData"))
+flag <- x[,1]
+#ix.flag <- which((flag > 63 & flag < 70) | (flag > 95 & flag < 118) | flag == 161 | flag == 181) 
+ix.flag <- which((flag > 63 & flag < 70) | (flag > 95 & flag < 112) | flag == 161) 
+id_1 <- as.character(x[ix.flag,2])
+id_2 <- as.character(x[ix.flag,5])
+pos_1 <- as.numeric(as.character(x[ix.flag,3]))
+pos_2 <- as.numeric(as.character(x[ix.flag,6]))
+rm(x)
+genename <- as.character(EnsemblGene.GenePosition$EnsemblGene)
+id1 <- c()
+id2 <- c()
+nreads <- c()
+diffpos <- c()
+generef <- unique(id_1)
+for (i in 1: length(generef)) {
+  ix.generef <- which(genename == generef[i])	
+  ix.gene <- which(id_1 == generef[i])
+  tmp <- sort(summary(as.factor(id_2[ix.gene]), maxsum = length(unique(id_2[ix.gene]))),  decreasing = T)
+  tmp.genename <- names(tmp)
+  tmp.weight <- as.numeric(tmp)
+  if ((max(tmp.weight) >= minreads) & (length(which(tmp.weight >= minreads)) <= 10)) {
+    ix.maxnodes <- which(tmp.weight >= minreads)
+    tmp.genename <- tmp.genename[ix.maxnodes]
+    tmp.weight <- tmp.weight[ix.maxnodes]
+    for (j in 1:length(tmp.weight)) {
+      ix.genelink <- which(genename == tmp.genename[j])
+      if (length(ix.genelink)!=0) {
+        id1 <- c(id1, generef[i])
+        id2 <- c(id2, tmp.genename[j])
+        nreads <- c(nreads, tmp.weight[j])
+        diffpos <- c(diffpos, abs(ix.generef-ix.genelink))
+      }
+    }
+  } 
+  
+}
+## filter paralogs if paralogs exist
+
+if (file.exists(file.path(dbfolder,"data", refid, "EnsemblGene.Paralogs.RData"))) {
+  
+  load(file.path(dbfolder, "data", refid, "EnsemblGene.Paralogs.RData"))  
+  paralogs.flag <- rep(0, length(id1))
+  
+  if (length(id1) == 0) {
+    myflag <- 0
+    cat(myflag, file = file.path(outputfolder, "out", ".ericscript.flag"))
+    stop("No discordant reads found with minimum reads set to ", minreads, ". Exit!")
+  }
+  for (i in 1: length(id1)) {
+    ix.paralogs <- which(EnsemblGene.Paralogs$EnsemblGene == id1[i])
+    paralogs <- as.character(EnsemblGene.Paralogs$Paralogs[ix.paralogs])
+    if (length(grep(id2[i], paralogs)) > 0) {
+      paralogs.flag[i] <- 1
+    }
+  }
+  ##
+  paralogs.filter <- which(paralogs.flag == 0)
+  id1f <- id1[paralogs.filter]
+  id2f <- id2[paralogs.filter]
+  nreadsf <- nreads[paralogs.filter]
+  diffposf <- diffpos[paralogs.filter]
+  if (length(id1) == 0) {
+    myflag <- 0
+    cat(myflag, file = file.path(outputfolder, "out", ".ericscript.flag"))
+    stop("No discordant reads found with minimum reads set to ",minreads,". Exit!")
+  } 
+  nfus <- length(id1f)
+  MyGF <- vector("list", 6)
+  names(MyGF) <- c("id1", "id2", "nreads", "pos1", "pos2", "diffpos")
+  MyGF$id1 <- id1f
+  MyGF$id2 <- id2f
+  MyGF$nreads <- nreadsf
+  MyGF$diffpos <- diffposf
+  MyGF$pos1 <- vector("list", nfus)
+  MyGF$pos2 <- vector("list", nfus)
+  for (i in 1: (nfus)) {
+    ix.pos <- which((id_1 == id1f[i]) & (id_2 ==id2f[i]))
+    MyGF$pos1[[i]] <- pos_1[ix.pos]
+    MyGF$pos2[[i]] <- pos_2[ix.pos]
+  }
+} else {
+  
+  nfus <- length(id1)
+  MyGF <- vector("list", 6)
+  names(MyGF) <- c("id1", "id2", "nreads", "pos1", "pos2", "diffpos")
+  MyGF$id1 <- id1
+  MyGF$id2 <- id2
+  MyGF$nreads <- nreads
+  MyGF$diffpos <- diffpos
+  MyGF$pos1 <- pos_1
+  MyGF$pos2 <- pos_2
+  
+}
+
+save(MyGF, file = file.path(outputfolder, "out", paste(samplename, ".chimeric.RData", sep = "")))
+
diff --git a/lib/R/MakeEmptyResults.R b/lib/R/MakeEmptyResults.R
new file mode 100644
index 0000000..00781a7
--- /dev/null
+++ b/lib/R/MakeEmptyResults.R
@@ -0,0 +1,27 @@
+## MakeResults.R v0.2
+## different read count-based method for gene expression level estimation
+## added machine-learning based algorithm as summarization score
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+samplename <- split.vars [1]
+outputfolder <- split.vars[2]
+
+Results <- "No Chimeric Transcript found!"
+write.table(Results, file = file.path(outputfolder,paste(samplename,".results.total.tsv", sep = "")), sep = "\t", row.names = F, col.names = F, quote = F)
+	
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/lib/R/MakeResults.R b/lib/R/MakeResults.R
new file mode 100644
index 0000000..7cb5155
--- /dev/null
+++ b/lib/R/MakeResults.R
@@ -0,0 +1,485 @@
+## MakeResults.R v0.5
+## different read count-based method for gene expression level estimation
+## added machine-learning based algorithm as summarization score
+## new method to retrieve genomic coordinates
+## genome reference from db data
+## edited gene fusions separator 
+
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+samplename <- split.vars [1]
+outputfolder <- split.vars[2]
+ericscriptfolder <- split.vars[3]
+readlength <- as.numeric(split.vars[4])
+verbose <- as.numeric(split.vars[5])
+refid <- as.character(split.vars [6])
+dbfolder <- as.character(split.vars[7])
+#genomeref <- as.character(split.vars[8])
+
+flag.ada <- require(ada, quietly = T)
+if (flag.ada == F) {
+  require(kernlab, quietly = T)  
+}
+load(file.path(outputfolder,"out",paste(samplename,".chimeric.RData", sep = "")))
+load(file.path(outputfolder,"out",paste(samplename,".DataMatrix.RData", sep = "")))
+load(file.path(outputfolder,"out", paste(samplename,".ids_homology.RData", sep  = "")))
+load(file.path(dbfolder, "data", refid, "EnsemblGene.GeneInfo.RData"))
+load(file.path(dbfolder, "data", refid, "EnsemblGene.Structures.RData"))
+load(file.path(dbfolder, "data", refid, "EnsemblGene.GeneNames.RData"))
+load(file.path(dbfolder, "data", refid, "EnsemblGene.Sequences.RData"))
+load(file.path(ericscriptfolder, "lib","data", "_resources", "BlackList.RData"))
+load(file.path(ericscriptfolder, "lib","data", "_resources", "DataModel.RData"))
+genomeref <- file.path(dbfolder, "data", refid, "allseq.fa")
+  
+checkselfhomology.fa <- scan(file = file.path(outputfolder,"out", paste(samplename, ".checkselfhomology.fa", sep = "")), sep= "\n", what = "", quiet = T)
+seq.length <- nchar(checkselfhomology.fa)[2]
+ix.id.fa <- which(checkselfhomology.fa %in% paste(">",info.id.and.homology[,1], sep = ""))
+ix.fa <- ix.id.fa + 1
+left_junction <- substr(checkselfhomology.fa[ix.fa], 1, (seq.length/2))
+right_junction <- substr(checkselfhomology.fa[ix.fa], (seq.length/2 + 1), seq.length)
+tmp_id <- unlist(strsplit(checkselfhomology.fa[ix.id.fa], "----", fixed = T))
+left_id <- tmp_id[seq(1, length(ix.id.fa)*2, by = 2)]
+right_id <- paste(">",tmp_id[seq(2, length(ix.id.fa)*2, by = 2)], sep = "")
+exone.tmp <- unlist(strsplit(as.character(DataMatrix[,1]), "----", fixed = T))
+exone.tmp1 <- exone.tmp[seq(1, dim(DataMatrix)[1]*2, by = 2)]
+exone.tmp2 <- exone.tmp[seq(2, dim(DataMatrix)[1]*2, by = 2)]
+ensgenename1 <- unlist(strsplit(exone.tmp1, "_"))[seq(1, dim(DataMatrix)[1]*2, by = 2)]
+ensgenename2 <- unlist(strsplit(exone.tmp2, "_"))[seq(1, dim(DataMatrix)[1]*2, by = 2)]
+ix.idd <- which(DataMatrix[,1] %in% info.id.and.homology[,1])
+DataMatrixF <- DataMatrix[ix.idd ,]
+homology <- info.id.and.homology[,2]
+flag.dup.a <- info.id.and.homology[,3]
+flag.dup.b <- info.id.and.homology[,4]
+geneinfo.id <- as.character(EnsemblGene.Structures$EnsemblGene)
+geneinfo.chr <- as.character(EnsemblGene.Structures$Chromosome)
+geneinfo.start <- as.character(EnsemblGene.Structures$geneStart)
+geneinfo.end <- as.character(EnsemblGene.Structures$geneEnd)
+geneinfo.description <- as.character(EnsemblGene.GeneInfo$Description)
+geneinfo.genename <- as.character(EnsemblGene.GeneInfo$GeneName)
+geneinfo.strand <- as.character(EnsemblGene.Structures$Strand)
+ensgenename12 <- as.character(DataMatrixF[,1])
+exone.tmp <- unlist(strsplit(as.character(DataMatrixF[,1]), "----", fixed = T))
+exone.tmp1 <- exone.tmp[seq(1, dim(DataMatrixF)[1]*2, by = 2)]
+exone.tmp2 <- exone.tmp[seq(2, dim(DataMatrixF)[1]*2, by = 2)]
+ensgenename1 <- unlist(strsplit(exone.tmp1, "_"))[seq(1, dim(DataMatrixF)[1]*2, by = 2)]
+ensgenename2 <- unlist(strsplit(exone.tmp2, "_"))[seq(1, dim(DataMatrixF)[1]*2, by = 2)]
+geneid1 <- rep("",length(ensgenename1))
+geneid2 <- rep("",length(ensgenename1))
+description1 <- rep("",length(ensgenename1))
+description2 <- rep("",length(ensgenename1))
+status1 <- rep("",length(ensgenename1))
+status2 <- rep("",length(ensgenename1))
+chr1 <- rep("", length(ensgenename1))
+chr2 <- rep("", length(ensgenename1))
+genestart1 <- rep("", length(ensgenename1))
+genestart2 <- rep("", length(ensgenename1))
+geneend1 <- rep("", length(ensgenename1))
+geneend2 <- rep("", length(ensgenename1))
+biotype1 <- rep("", length(ensgenename1))
+biotype2 <- rep("", length(ensgenename1))
+strand1 <- rep("", length(ensgenename1))
+strand2 <- rep("", length(ensgenename1))
+
+for (i in 1:length(ensgenename1)) {
+  ix.gene1 <- which(geneinfo.id == ensgenename1[i])
+  ix.gene1.info <- which(EnsemblGene.GeneInfo$EnsemblGene == ensgenename1[i])
+  if (length(ix.gene1)!=0) {
+    geneid1[i] <- geneinfo.genename[ix.gene1.info]
+    description1[i] <- geneinfo.description[ix.gene1.info]
+    chr1[i] <- geneinfo.chr[ix.gene1]
+    strand1[i] <- geneinfo.strand[ix.gene1]
+    genestart1[i] <- geneinfo.start[ix.gene1]
+    geneend1[i] <- geneinfo.end[ix.gene1]
+  }
+  ix.gene2 <- which(geneinfo.id == ensgenename2[i])
+  ix.gene2.info <- which(EnsemblGene.GeneInfo$EnsemblGene == ensgenename2[i])
+  if (length(ix.gene2)!=0) {
+    geneid2[i] <- geneinfo.genename[ix.gene2.info]
+    description2[i] <- geneinfo.description[ix.gene2.info]
+    chr2[i] <- geneinfo.chr[ix.gene2]
+    strand2[i] <- geneinfo.strand[ix.gene2]
+    genestart2[i] <- geneinfo.start[ix.gene2]
+    geneend2[i] <- geneinfo.end[ix.gene2]
+    
+  }
+}
+
+exone1 <- unlist(strsplit(exone.tmp1, "_"))[seq(2, dim(DataMatrixF)[1]*2, by = 2)]
+exone2 <- unlist(strsplit(exone.tmp2, "_"))[seq(2, dim(DataMatrixF)[1]*2, by = 2)]
+genename1 <- geneid1
+genename2 <- geneid2
+diff.adj.tot <- rep(NA, length(ensgenename1))
+n.crossing <- rep(NA, length(ensgenename1))
+for (hh in 1: length(ensgenename1)) {
+  hhix <- which(MyGF$id1 == ensgenename1[hh] & MyGF$id2 == ensgenename2[hh])
+  diff.adj.tot[hh] <- MyGF$diffpos[hhix]
+  n.crossing[hh] <- MyGF$nreads[hhix]
+}
+n.spanning <- as.numeric(as.character(DataMatrixF[,4])) 
+edge.score <- round(as.numeric(as.character(DataMatrixF[,6])), digits = 4)
+gjs.score <- round(as.numeric(as.character(DataMatrixF[,7])), digits = 4)
+unique.score <- round(as.numeric(as.character(DataMatrixF[,8])), digits = 4)
+isize.score <- round(as.numeric(as.character(DataMatrixF[,9])), digits = 4)
+ins.size <- round(as.numeric(as.character(DataMatrixF[,3])), digits = 2)
+adj <- rep("Not Adjacent", length(ensgenename1))
+adj[which(diff.adj.tot == 1)] <- "Adjacent"
+if (refid == "homo_sapiens") {
+  
+  acceptable.chrs <- c(seq.int(1,22), "X","Y")
+  ix.chr1 <- which(chr1 %in% acceptable.chrs)
+  ix.chr2 <- which(chr2 %in% acceptable.chrs)
+  ix.chr.tmp1 <- intersect(ix.chr1, ix.chr2)
+  ix.chr.tmp2 <- intersect(grep("NNN", right_junction, invert = T), grep("NNN", left_junction, invert = T))
+  ix.chr <- intersect(ix.chr.tmp1, ix.chr.tmp2)
+  
+  
+  if (length(ix.chr) > 0) {
+    genename1 <- genename1[ix.chr]
+    genename2 <- genename2[ix.chr]
+    chr1 <- chr1[ix.chr]
+    chr2 <- chr2[ix.chr]
+    genestart1 <- genestart1[ix.chr]
+    genestart2 <- genestart2[ix.chr]
+    geneend1 <- geneend1[ix.chr]
+    geneend2 <- geneend2[ix.chr]
+    exone1 <- exone1[ix.chr]
+    exone2 <- exone2[ix.chr]
+    ensgenename1 <- ensgenename1[ix.chr]
+    ensgenename12 <- ensgenename12[ix.chr]
+    ensgenename2 <- ensgenename2[ix.chr]
+    DataMatrixF <- rbind(DataMatrixF[ix.chr,])
+    homology <- homology[ix.chr]
+    flag.dup.a <- flag.dup.a[ix.chr]
+    flag.dup.b <- flag.dup.b[ix.chr]
+    adj <- adj[ix.chr]
+    n.spanning <- n.spanning[ix.chr]
+    n.crossing <- n.crossing[ix.chr]
+    edge.score <- edge.score[ix.chr]
+    unique.score <- unique.score[ix.chr]
+    gjs.score <- gjs.score[ix.chr]
+    isize.score <- isize.score[ix.chr]
+    ins.size <- ins.size[ix.chr]
+    biotype1 <- biotype1[ix.chr]
+    biotype2 <- biotype2[ix.chr]
+    status1 <- status1[ix.chr]
+    status2 <- status2[ix.chr]
+    description1 <- description1[ix.chr]
+    description2 <- description2[ix.chr]
+    left_id <- left_id[ix.chr]
+    left_junction <- left_junction[ix.chr]
+    right_id <- right_id[ix.chr]
+    right_junction <- right_junction[ix.chr]
+    strand1 <- strand1[ix.chr]
+    strand2 <- strand2[ix.chr]
+  }
+}
+stats <- read.delim(file.path(outputfolder, "out", paste(samplename, ".stats", sep = "")), sep = "\t", header = F)
+gene.stats <- as.character(stats[,1])
+length.stats <- as.numeric(stats[,2])
+nreads.stats <- as.numeric(stats[,3])
+gene1.exp <- rep(0, length(ensgenename1))
+gene2.exp <- rep(0, length(ensgenename1))
+gene12.exp <- round(((as.numeric(DataMatrixF[,2]) + as.numeric(DataMatrixF[,4]))*readlength/as.numeric(DataMatrixF[,5])), digits = 2)
+#gene12.exp <- rep(0, length(ensgenename1))
+for (iexpr in 1: length(ensgenename1)) {
+  ix1.stats <- which(gene.stats == ensgenename1[iexpr])
+  ix2.stats <- which(gene.stats == ensgenename2[iexpr])
+  #    ix12.stats <- which(gene.stats == ensgenename12[iexpr])
+  gene1.exp[iexpr] <- round(nreads.stats[ix1.stats]*readlength/length.stats[ix1.stats], digits = 2)
+  gene2.exp[iexpr] <- round(nreads.stats[ix2.stats]*readlength/length.stats[ix2.stats], digits = 2)
+  #    gene12.exp[iexpr] <- round(nreads.stats[ix12.stats]*readlength/length.stats[ix12.stats], digits = 2)
+}
+
+# NEW Find GenomicPosition (50nt)
+for (i in 1: length(ensgenename1)) {
+  if (i == 1) {
+    cat(paste("@", i, "_", 1, "\n", left_junction[i], "\n+\n", gsub(", ", "", toString(rep("I", nchar(left_junction[i])))), "\n", "@", i, "_", 2, "\n", right_junction[i],"\n+\n", gsub(", ", "", toString(rep("I", nchar(right_junction[i])))), sep = ""), sep = "\n", file = file.path(outputfolder, "out", "findgenomicpos.fq"), append = F)    
+  } else {
+    cat(paste("@", i, "_", 1, "\n", left_junction[i], "\n+\n", gsub(", ", "", toString(rep("I", nchar(left_junction[i])))), "\n", "@", i, "_", 2, "\n", right_junction[i],"\n+\n", gsub(", ", "", toString(rep("I", nchar(right_junction[i])))), sep = ""), sep = "\n", file = file.path(outputfolder, "out", "findgenomicpos.fq"), append = T)  
+  }
+}
+system(paste("bwa aln", "-R 50", genomeref, file.path(outputfolder, "out", "findgenomicpos.fq"), ">", file.path(outputfolder, "out", "findgenomicpos.fq.sai"), "2>>", file.path(outputfolder, "out", ".ericscript.log")))
+system(paste("bwa samse", "-n 50", genomeref, file.path(outputfolder, "out", "findgenomicpos.fq.sai"), file.path(outputfolder, "out", "findgenomicpos.fq"), ">", file.path(outputfolder, "out", "findgenomicpos.fq.tmp"), "2>>", file.path(outputfolder, "out", ".ericscript.log")))
+system(paste("cat", file.path(outputfolder, "out", "findgenomicpos.fq.tmp"), "|", file.path(ericscriptfolder, "lib", "perl", "xa2multi.pl"), "-", "|","grep -v -e \'^\\@\' -",">", file.path(outputfolder, "out", "findgenomicpos.fq.sam")))
+xx.pos <- read.delim(file.path(outputfolder, "out", "findgenomicpos.fq.sam"), sep = "\t", header= F)
+genpos_1 <- rep(0,length(ensgenename1))
+genpos_2 <- rep(0,length(ensgenename1))
+id.pos <- as.character(xx.pos[[1]])
+flag.pos <- as.character(xx.pos[[2]])
+chr.pos <- as.character(xx.pos[[3]])
+if (length(grep("chr", chr.pos)) > 0) {
+  chr.pos <- gsub("chr", "", chr.pos)
+}
+pos.pos <- as.numeric(as.character(xx.pos[[4]]))
+mapq.pos <- as.numeric(as.character(xx.pos[[5]]))
+
+for (i in 1: length(ensgenename1)) {
+  
+  ## for 5' gene
+  ix.mypos <- which(id.pos == paste(i, "_1", sep = ""))
+  chr.pos.ix <- chr.pos[ix.mypos]
+  flag.pos.ix <- flag.pos[ix.mypos]
+  pos.pos.ix <- pos.pos[ix.mypos]
+  mapq.pos.ix <- mapq.pos[ix.mypos] 
+  ix.okpos <- which(chr.pos.ix == chr1[i] & pos.pos.ix >= as.numeric(genestart1[i]) & pos.pos.ix <= as.numeric(geneend1[i]))
+  if (length(ix.okpos) > 1) {
+    ix.okpos <- ix.okpos[which.max(mapq.pos.ix[ix.okpos])]
+  }
+  if (length(ix.okpos) > 0) {
+    if (flag.pos.ix[ix.okpos] == 16) {
+      genpos_1[i] <- pos.pos.ix[ix.okpos] 
+    } else {
+      genpos_1[i] <- pos.pos.ix[ix.okpos] + 49
+    }
+  }
+  ## for 3' gene
+  ix.mypos <- which(id.pos == paste(i, "_2", sep = ""))
+  chr.pos.ix <- chr.pos[ix.mypos]
+  flag.pos.ix <- flag.pos[ix.mypos]
+  pos.pos.ix <- pos.pos[ix.mypos]
+  mapq.pos.ix <- mapq.pos[ix.mypos] 
+  ix.okpos <- which(chr.pos.ix == chr2[i] & pos.pos.ix >= as.numeric(genestart2[i]) & pos.pos.ix <= as.numeric(geneend2[i]))
+  if (length(ix.okpos) > 1) {
+    ix.okpos <- ix.okpos[which.max(mapq.pos.ix[ix.okpos])]
+  }
+  if (length(ix.okpos) > 0) {
+    if (flag.pos.ix[ix.okpos] == 16) {
+      genpos_2[i] <- pos.pos.ix[ix.okpos] + 49
+    } else {
+      genpos_2[i] <- pos.pos.ix[ix.okpos] 
+    }
+  }
+}
+
+# NEW Find GenomicPosition (25nt_1)
+ix.na.pos_1 <- which(genpos_1 == 0)
+ix.na.pos_2 <- which(genpos_2 == 0)
+if (length(ix.na.pos_1 ) > 0 | length(ix.na.pos_2 ) > 0) {
+  
+left_junction.trim <- substr(left_junction, 26, 50)
+right_junction.trim <- substr(right_junction, 1, 25)
+for (i in 1: length(ensgenename1)) {
+  if (i == 1) {
+    cat(paste("@", i, "_", 1, "\n", left_junction.trim[i], "\n+\n", gsub(", ", "", toString(rep("I", nchar(left_junction.trim[i])))), "\n", "@", i, "_", 2, "\n", right_junction.trim[i],"\n+\n", gsub(", ", "", toString(rep("I", nchar(right_junction.trim[i])))), sep = ""), sep = "\n", file = file.path(outputfolder, "out", "findgenomicpos.fq"), append = F)    
+  } else {
+    cat(paste("@", i, "_", 1, "\n", left_junction.trim[i], "\n+\n", gsub(", ", "", toString(rep("I", nchar(left_junction.trim[i])))), "\n", "@", i, "_", 2, "\n", right_junction.trim[i],"\n+\n", gsub(", ", "", toString(rep("I", nchar(right_junction.trim[i])))), sep = ""), sep = "\n", file = file.path(outputfolder, "out", "findgenomicpos.fq"), append = T)  
+  }
+}
+system(paste("bwa aln", "-R 50", genomeref, file.path(outputfolder, "out", "findgenomicpos.fq"), ">", file.path(outputfolder, "out", "findgenomicpos.fq.sai"), "2>>", file.path(outputfolder, "out", ".ericscript.log")))
+system(paste("bwa samse", "-n 50", genomeref, file.path(outputfolder, "out", "findgenomicpos.fq.sai"), file.path(outputfolder, "out", "findgenomicpos.fq"), ">", file.path(outputfolder, "out", "findgenomicpos.fq.tmp"), "2>>", file.path(outputfolder, "out", ".ericscript.log")))
+system(paste("cat", file.path(outputfolder, "out", "findgenomicpos.fq.tmp"), "|", file.path(ericscriptfolder, "lib", "perl", "xa2multi.pl"), "-", "|","grep -v -e \'^\\@\' -",">", file.path(outputfolder, "out", "findgenomicpos.fq.sam")))
+xx.pos <- read.delim(file.path(outputfolder, "out", "findgenomicpos.fq.sam"), sep = "\t", header= F)
+id.pos <- as.character(xx.pos[[1]])
+flag.pos <- as.character(xx.pos[[2]])
+chr.pos <- as.character(xx.pos[[3]])
+if (length(grep("chr", chr.pos)) > 0) {
+  chr.pos <- gsub("chr", "", chr.pos)
+}
+pos.pos <- as.numeric(as.character(xx.pos[[4]]))
+mapq.pos <- as.numeric(as.character(xx.pos[[5]]))
+for (i in 1: length(ensgenename1)) {
+  if (i %in% ix.na.pos_1) {
+    ## for 5' gene
+    ix.mypos <- which(id.pos == paste(i, "_1", sep = ""))
+    chr.pos.ix <- chr.pos[ix.mypos]
+    flag.pos.ix <- flag.pos[ix.mypos]
+    pos.pos.ix <- pos.pos[ix.mypos]
+    mapq.pos.ix <- mapq.pos[ix.mypos] 
+    ix.okpos <- which(chr.pos.ix == chr1[i] & pos.pos.ix >= as.numeric(genestart1[i]) & pos.pos.ix <= as.numeric(geneend1[i]))
+    if (length(ix.okpos) > 1) {
+      ix.okpos <- ix.okpos[which.max(mapq.pos.ix[ix.okpos])]
+    }
+    if (length(ix.okpos) > 0) {
+      if (flag.pos.ix[ix.okpos] == 16) {
+        genpos_1[i] <- pos.pos.ix[ix.okpos] 
+      } else {
+        genpos_1[i] <- pos.pos.ix[ix.okpos] + 24
+      }
+    }
+  }
+  ## for 3' gene
+  if (i %in% ix.na.pos_2) {
+    ix.mypos <- which(id.pos == paste(i, "_2", sep = ""))
+    chr.pos.ix <- chr.pos[ix.mypos]
+    flag.pos.ix <- flag.pos[ix.mypos]
+    pos.pos.ix <- pos.pos[ix.mypos]
+    mapq.pos.ix <- mapq.pos[ix.mypos] 
+    ix.okpos <- which(chr.pos.ix == chr2[i] & pos.pos.ix >= as.numeric(genestart2[i]) & pos.pos.ix <= as.numeric(geneend2[i]))
+    if (length(ix.okpos) > 1) {
+      ix.okpos <- ix.okpos[which.max(mapq.pos.ix[ix.okpos])]
+    }
+    if (length(ix.okpos) > 0) {
+      if (flag.pos.ix[ix.okpos] == 16) {
+        genpos_2[i] <- pos.pos.ix[ix.okpos] + 24
+      } else {
+        genpos_2[i] <- pos.pos.ix[ix.okpos] 
+      }
+    }
+  }
+}
+# NEW Find GenomicPosition (25nt_2)
+ix.na.pos_1 <- which(genpos_1 == 0)
+ix.na.pos_2 <- which(genpos_2 == 0)
+left_junction.trim <- substr(left_junction, 1, 25)
+right_junction.trim <- substr(right_junction, 26, 50)
+for (i in 1: length(ensgenename1)) {
+  if (i == 1) {
+    cat(paste("@", i, "_", 1, "\n", left_junction.trim[i], "\n+\n", gsub(", ", "", toString(rep("I", nchar(left_junction.trim[i])))), "\n", "@", i, "_", 2, "\n", right_junction.trim[i],"\n+\n", gsub(", ", "", toString(rep("I", nchar(right_junction.trim[i])))), sep = ""), sep = "\n", file = file.path(outputfolder, "out", "findgenomicpos.fq"), append = F)    
+  } else {
+    cat(paste("@", i, "_", 1, "\n", left_junction.trim[i], "\n+\n", gsub(", ", "", toString(rep("I", nchar(left_junction.trim[i])))), "\n", "@", i, "_", 2, "\n", right_junction.trim[i],"\n+\n", gsub(", ", "", toString(rep("I", nchar(right_junction.trim[i])))), sep = ""), sep = "\n", file = file.path(outputfolder, "out", "findgenomicpos.fq"), append = T)  
+  }
+}
+system(paste("bwa aln", "-R 50", genomeref, file.path(outputfolder, "out", "findgenomicpos.fq"), ">", file.path(outputfolder, "out", "findgenomicpos.fq.sai"), "2>>", file.path(outputfolder, "out", ".ericscript.log")))
+system(paste("bwa samse", "-n 50", genomeref, file.path(outputfolder, "out", "findgenomicpos.fq.sai"), file.path(outputfolder, "out", "findgenomicpos.fq"), ">", file.path(outputfolder, "out", "findgenomicpos.fq.tmp"), "2>>", file.path(outputfolder, "out", ".ericscript.log")))
+system(paste("cat", file.path(outputfolder, "out", "findgenomicpos.fq.tmp"), "|", file.path(ericscriptfolder, "lib", "perl", "xa2multi.pl"), "-", "|","grep -v -e \'^\\@\' -",">", file.path(outputfolder, "out", "findgenomicpos.fq.sam")))
+
+
+xx.pos <- read.delim(file.path(outputfolder, "out", "findgenomicpos.fq.sam"), sep = "\t", header= F)
+id.pos <- as.character(xx.pos[[1]])
+flag.pos <- as.character(xx.pos[[2]])
+chr.pos <- as.character(xx.pos[[3]])
+if (length(grep("chr", chr.pos)) > 0) {
+  chr.pos <- gsub("chr", "", chr.pos)
+}
+pos.pos <- as.numeric(as.character(xx.pos[[4]]))
+mapq.pos <- as.numeric(as.character(xx.pos[[5]]))
+for (i in 1: length(ensgenename1)) {
+  if (i %in% ix.na.pos_1) {
+    ## for 5' gene
+    ix.mypos <- which(id.pos == paste(i, "_1", sep = ""))
+    chr.pos.ix <- chr.pos[ix.mypos]
+    flag.pos.ix <- flag.pos[ix.mypos]
+    pos.pos.ix <- pos.pos[ix.mypos]
+    mapq.pos.ix <- mapq.pos[ix.mypos] 
+    ix.okpos <- which(chr.pos.ix == chr1[i] & pos.pos.ix >= as.numeric(genestart1[i]) & pos.pos.ix <= as.numeric(geneend1[i]))
+    if (length(ix.okpos) > 1) {
+      ix.okpos <- ix.okpos[which.max(mapq.pos.ix[ix.okpos])]
+    }
+    if (length(ix.okpos) > 0) {
+      if (flag.pos.ix[ix.okpos] == 16) {
+        genpos_1[i] <- pos.pos.ix[ix.okpos] - 1
+      } else {
+        genpos_1[i] <- pos.pos.ix[ix.okpos] + 24
+      }
+    }
+  }
+  ## for 3' gene
+  if (i %in% ix.na.pos_2) {
+    ix.mypos <- which(id.pos == paste(i, "_2", sep = ""))
+    chr.pos.ix <- chr.pos[ix.mypos]
+    flag.pos.ix <- flag.pos[ix.mypos]
+    pos.pos.ix <- pos.pos[ix.mypos]
+    mapq.pos.ix <- mapq.pos[ix.mypos] 
+    ix.okpos <- which(chr.pos.ix == chr2[i] & pos.pos.ix >= as.numeric(genestart2[i]) & pos.pos.ix <= as.numeric(geneend2[i]))
+    if (length(ix.okpos) > 1) {
+      ix.okpos <- ix.okpos[which.max(mapq.pos.ix[ix.okpos])]
+    }
+    if (length(ix.okpos) > 0) {
+      if (flag.pos.ix[ix.okpos] == 16) {
+        genpos_2[i] <- pos.pos.ix[ix.okpos] + 24
+      } else {
+        genpos_2[i] <- pos.pos.ix[ix.okpos] - 1 
+      }
+    }
+  }
+}
+}
+
+genpos_1.recal <- genpos_1
+genpos_2.recal <- genpos_2
+
+# # refine genomic coordinates
+# for (i in 1: length(ensgenename1)) {
+#   ix.ref <- grep(ensgenename1[i], GeneNames)
+#   ix.ref.table <- which(GeneNames[ix.ref] == geneinfo.id)
+#   if (strand1[i] == "+") {
+#     exonpos <- as.numeric(unlist(strsplit(as.character(EnsemblGene.Structures$exonEnd[ix.ref.table]), ",")))
+#   } else {
+#     exonpos <- as.numeric(unlist(strsplit(as.character(EnsemblGene.Structures$exonStart[ix.ref.table]), ",")))
+#   }
+#   ix.exon <- which.min(abs(genpos_1[i] - exonpos))
+#   mydiff <- abs(genpos_1[i] - exonpos[ix.exon])
+#   if (mydiff <= 3) {
+#     genpos_1.recal[i] <- exonpos[ix.exon]
+#   }
+#   
+#   ix.ref <- grep(ensgenename2[i], GeneNames)
+#   ix.ref.table <- which(GeneNames[ix.ref] == geneinfo.id)
+#   if (strand2[i] == "+") {
+#     exonpos <- as.numeric(unlist(strsplit(as.character(EnsemblGene.Structures$exonStart[ix.ref.table]), ",")))
+#   } else {
+#     exonpos <- as.numeric(unlist(strsplit(as.character(EnsemblGene.Structures$exonEnd[ix.ref.table]), ",")))
+#   }
+#   ix.exon <- which.min(abs(genpos_2[i] - exonpos))
+#   mydiff <- abs(genpos_2[i] - exonpos[ix.exon])
+#   if (mydiff <= 3) {
+#     genpos_2.recal[i] <- exonpos[ix.exon]
+#   }
+# }
+
+
+nreads.score <- pmin( n.crossing, n.spanning)/pmax(n.crossing, n.spanning)
+myscores <- cbind(gjs.score, edge.score, nreads.score, gene12.exp)
+colnames(myscores) <- c("probs.gjs", "probs.es", "probs.us", "cov")
+myscores <- data.frame(myscores)
+if (flag.ada) {
+  myada <- ada(control~., data = DataScores,loss="exponential", nu = 0.1)
+  ericscore <- as.numeric(predict(myada, myscores, type = "probs")[,2])
+} else {
+  sig <- sigest(control~., data = DataScores, frac = 1, na.action = na.omit, scaled = TRUE)[2]
+  model <- ksvm(control~., data = DataScores, type = "C-svc", kernel = "rbfdot", kpar = list(sigma = sig), C = 1, prob.model = TRUE)
+  ericscore <- predict(model, myscores, type = "probabilities")[,2]  
+}
+ix.repeated <- unique(c(which(is.na(genpos_1)), which(is.na(genpos_2))))
+ix.norepeated <- intersect(which(is.na(genpos_1) == F), which(is.na(genpos_2) == F))
+genpos_1.recal[which(genpos_1.recal == 0)] <- "Unable to predict breakpoint position"
+genpos_2.recal[which(genpos_2.recal == 0)] <- "Unable to predict breakpoint position"
+myblacklist <- rep("", length(genename1))
+ix.bl <- which((genename1 %in% gene.bl1 & genename2 %in% gene.bl2) | (genename1 %in% gene.bl2 & genename2 %in% gene.bl1))
+if (length(ix.bl) > 0) {
+  for (bli in 1: length(ix.bl)) {
+    ix.bli <- which((gene.bl1 == genename1[ix.bl[bli]] & gene.bl2 == genename2[ix.bl[bli]]) | (gene.bl2 == genename1[ix.bl[bli]] & gene.bl1 == genename2[ix.bl[bli]]))
+    myblacklist[ix.bl[bli]] <- paste("Frequency:", sum(freq.bl[ix.bli]))
+  }
+}
+oddity.spanningreads <- rep(0, length(genpos_1))
+oddity.spanningreads[which(n.spanning == 1 & n.crossing >= 10)] <- 1    
+fusion.type <- rep("inter-chromosomal",length(genpos_1))
+fusion.type[which(chr1==chr2)] <- "intra-chromosomal"
+fusion.type[intersect(intersect(which(chr1==chr2) , which(adj == "Adjacent")), which(strand1 == strand2))] <- "Read-Through"
+fusion.type[intersect(intersect(which(chr1==chr2) , which(adj == "Adjacent")), which(strand1 != strand2))] <- "Cis"
+junctionsequence <- paste(tolower(left_junction), right_junction, sep = "")
+SummaryMat <- cbind(genename1, genename2, chr1, genpos_1.recal, strand1, chr2, genpos_2.recal , strand2, ensgenename1, ensgenename2, n.crossing, n.spanning,ins.size, homology, fusion.type, myblacklist,description1, description2, junctionsequence, gene1.exp, gene2.exp, gene12.exp, edge.score, gjs.score, nreads.score, ericscore)
+colnames(SummaryMat) <- c("GeneName1", "GeneName2", "chr1","Breakpoint1", "strand1", "chr2" ,"Breakpoint2", "strand2","EnsemblGene1", "EnsemblGene2", "crossingreads","spanningreads","mean insertsize","homology","fusiontype", "Blacklist","InfoGene1", "InfoGene2", "JunctionSequence", "GeneExpr1", "GeneExpr2", "GeneExpr_Fused", "ES", "GJS", "US","EricScore")
+SummaryMat <- data.frame(SummaryMat)
+save(SummaryMat, file=file.path(outputfolder, paste(samplename, ".Summary.RData", sep = "")))
+if (dim(SummaryMat)[1] > 0) {
+  write.table(SummaryMat, file = file.path(outputfolder,paste(samplename,".results.total.tsv", sep = "")), sep = "\t", row.names = F, quote = F)
+  ix.sorting.score <- sort(ericscore, decreasing = T, index.return = T)$ix
+  ericscore.sorted <- ericscore[ix.sorting.score]
+  myblacklist.sorted <- myblacklist[ix.sorting.score]
+  oddity.spanningreads.sorted <- oddity.spanningreads[ix.sorting.score]
+  SummaryMat.sorted <- SummaryMat[ix.sorting.score, ]
+  SummaryMat.Filtered <-  SummaryMat.sorted[which(ericscore.sorted > 0.5 & myblacklist.sorted == "" & oddity.spanningreads.sorted == 0), ]
+  write.table(SummaryMat.Filtered[, -16], file = file.path(outputfolder,paste(samplename,".results.filtered.tsv", sep = "")), sep = "\t", row.names = F, quote = F)
+} else
+{
+  Results <- "No Chimeric Transcript found!"
+  write.table(Results, file = file.path(outputfolder,paste(samplename,".results.total.tsv", sep = "")), sep = "\t", row.names = F, col.names = F, quote = F)
+  
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/lib/R/RecalibrateJunctions.R b/lib/R/RecalibrateJunctions.R
new file mode 100644
index 0000000..cdf2896
--- /dev/null
+++ b/lib/R/RecalibrateJunctions.R
@@ -0,0 +1,141 @@
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+samplename <- split.vars [1]
+outputfolder <- split.vars[2]
+readlength <-  as.numeric(split.vars[3])
+verbose <- as.numeric(split.vars[4])
+grep.readlength <- c("grep")
+for (i in 1: length(readlength)) {
+	grep.readlength <- paste(grep.readlength, " -v -e MD:Z:", readlength[i], sep = "")
+}
+
+formatfasta <- function(myfasta, step = 50) {	
+	totalchar <- nchar(myfasta)
+	if (totalchar > step) {
+		steps <- seq(1, totalchar, by = step)
+		newfasta <- rep("", (length(steps) - 1))
+		for (j in 1: (length(steps) - 1)) {
+			aa <- substr(myfasta, steps[j], (steps[j] + (step - 1)))
+			newfasta[j] <- aa 
+		}
+		if ((totalchar - tail(steps, n = 1)) > 0) {
+			newfasta <-  c(newfasta, substr(myfasta, steps[j+1], totalchar))
+		}
+	} else
+	{
+		newfasta <- substr(myfasta, 1, totalchar)
+	}
+	return(newfasta)
+}
+
+TryRecalibration <- function(outputfolder, verbose) {
+    
+	if (verbose == 0) {
+		x <- system(paste("blat -tileSize=8 -fine", file.path(outputfolder,"out",".tmp.ref.fa"), file.path(outputfolder, "out", ".link"), file.path(outputfolder, "out",".recalibrated.junctions.blat"), " 1>> ", file.path(outputfolder, "out",".ericscript.log")))
+	} else {
+	  	x <- system(paste("blat -tileSize=8 -fine", file.path(outputfolder,"out",".tmp.ref.fa"), file.path(outputfolder, "out", ".link"), file.path(outputfolder, "out",".recalibrated.junctions.blat")))
+	}
+	yy <- readLines(file.path(outputfolder, "out", ".recalibrated.junctions.blat"), n = 6)
+	if (length(yy) > 5) {
+		xx <- read.delim(file.path(outputfolder, "out", ".recalibrated.junctions.blat"), sep = "\t", skip = 5, header = F)
+		gapsize <- xx[,8]
+	}
+	if (all(gapsize <= 3) | (length(yy) <= 5)) {
+		if (verbose == 0) {
+			x <- system(paste("blat -tileSize=8", file.path(outputfolder,"out",".tmp.ref.fa"), file.path(outputfolder, "out", ".link"), file.path(outputfolder, "out",".recalibrated.junctions.blat"), " 1>> ", file.path(outputfolder, "out",".ericscript.log")))
+		} else {
+			x <- system(paste("blat -tileSize=8", file.path(outputfolder,"out",".tmp.ref.fa"), file.path(outputfolder, "out", ".link"), file.path(outputfolder, "out",".recalibrated.junctions.blat")))
+		}
+		
+	}
+	
+}
+
+load(file.path(outputfolder,"out",paste(samplename,".junctions.RData", sep = "")))
+load(file.path(outputfolder,"out",paste(samplename,".ids_fasta.RData", sep = "")))
+load(file.path(outputfolder,"out",paste(samplename,".sequences_fasta.RData", sep = "")))
+cat(file.path(outputfolder,"out",".tmp.query.fa"), file = file.path(outputfolder,"out",".link"))
+sequences <- sequences.fasta
+recal.left <- rep(0, length(ids_fasta))
+recal.right <- rep(0, length(ids_fasta))
+count.recal <- rep(0, length(ids_fasta))
+count.total <- rep(0, length(ids_fasta))
+sequences.recal <- sequences
+junctions.recalibrated <- junctions
+for (i in 1:length(ids_fasta)) {
+	junction.tmp <- junctions[i]
+	x <- system(paste("samtools view ", file.path(outputfolder, "aln", paste(samplename,".remap.sorted.bam", sep = ""))," ", ids_fasta[i], " | ", grep.readlength, " | awk '((($5==0) && ($6==\"*\")) || ($5>=0))' | awk '{ print \">\" $1\"_\"$2,\"\\n\"$10}' > ", file.path(outputfolder,"out",".tmp.query.fa"),sep = ""))
+	xx <- readLines(file.path(outputfolder,"out",".tmp.query.fa"), n = 1)
+	if (length(xx) != 0) {
+		x <- cat(paste(">",ids_fasta[i],sep=""), sequences[i],sep = "\n", file = file.path(outputfolder,"out",".tmp.ref.fa"))
+		try.recal <- TryRecalibration(outputfolder, verbose)
+		rm(xx)
+		yy <- readLines(file.path(outputfolder, "out", ".recalibrated.junctions.blat"), n = 6)
+		if (length(yy) > 5) {
+			xx <- read.delim(file.path(outputfolder, "out", ".recalibrated.junctions.blat"), sep = "\t", skip = 5, header = F)
+			gapsize <- xx[,8]
+			gapstarts <- strsplit(as.character(xx[,21]), ",")
+			blocksize <- strsplit(as.character(xx[,19]), ",")
+			if (any(gapsize > 3)) {
+				ix.0 <- which(gapsize > 3)
+				if (length(ix.0) > 0) {
+					gapstarts1 <- gapstarts[ix.0]
+					gapstarts.tmp1 <- c()
+					gapstarts.tmp2 <- c()
+					for (jgap in 1:length(gapstarts1)) {
+						ccc <- as.numeric(gapstarts1[[jgap]])
+						gapstarts.tmp1 <- c(gapstarts.tmp1, ccc[1])
+						gapstarts.tmp2 <- c(gapstarts.tmp2, ccc[2])
+					}
+					ix.gap.in.junct <- ix.0[which((gapstarts.tmp1 <= junction.tmp) & (gapstarts.tmp2 >= (junction.tmp - 10 + 1)))]
+					gaps <- gapsize[ix.gap.in.junct]
+					unique.gaps <- unique(gaps)
+					rr <- tabulate(gaps)
+					max.rr <- max(rr)
+					if( max.rr >= 1) {
+						gap.length <- which.max(rr)
+						ix.gaps <- which(gapsize == gap.length) 
+						a <- rep(0, length(ix.gaps))
+						b <- rep(0, length(ix.gaps))
+						aa <- rep(0, length(ix.gaps))
+						for (jj in 1:length(ix.gaps)) {
+							gapstarts.tmp <- as.numeric(gapstarts[[ix.gaps[jj]]])
+							blocksize.tmp <- as.numeric(blocksize[[ix.gaps[jj]]])
+							a[jj] <- gapstarts.tmp[1] + blocksize.tmp[1] - 1
+							b[jj] <- gapstarts.tmp[2]
+							aa[jj] <- gapstarts.tmp[1]
+						}
+						max.a <- max(tabulate(a))
+						max.b <- max(tabulate(b))
+						my.a <- which.max(tabulate(a))
+						my.b <- which.max(tabulate(b))
+						if ((abs(max.a-max.b)/max.a) < 0.31) {
+							count.total[i] <- length(gaps)
+							count.recal[i] <- max.rr
+							recal.left[i] <- my.a + 1
+							recal.right[i] <- my.b + 1 
+							sequences.recal[i] <- paste(substr(sequences[i], 1, recal.left[i]), substr(sequences[i], recal.right[i], nchar(sequences[i])), sep = "")
+							junctions.recalibrated[i] <- recal.left[i]
+						}
+					}
+					
+				}
+			}
+		}
+		
+	} 
+}
+ids_fasta.recalibrated <- paste(">", ids_fasta, " junction@", junctions.recalibrated, sep = "")
+ref.recalibrated <- c()
+for (i in 1:length(ids_fasta.recalibrated)) {
+	ref.recalibrated <- c(ref.recalibrated, c(ids_fasta.recalibrated[i], formatfasta(sequences.recal[i])))
+}
+write(ref.recalibrated, file = file.path(outputfolder,"out",paste(samplename,".EricScript.junctions.recalibrated.fa", sep = "")), ncolumns = 1, sep = "")
+Recalibrated.Data <- cbind(recal.left, recal.right, junctions, count.recal, count.total)
+colnames(Recalibrated.Data) <- c("Left_Junction", "Right_Junction", "Junction", "Recal_Count", "Total_Count")
+save(sequences.recal, file = file.path(outputfolder,"out",paste(samplename,".sequences.recalibrated.RData", sep = "")))
+save(Recalibrated.Data, file = file.path(outputfolder,"out",paste(samplename,".Recalibrated.Data.RData", sep = "")))
+save(junctions.recalibrated, file = file.path(outputfolder,"out",paste(samplename,".junctions.recalibrated.RData", sep = "")))
+
+
diff --git a/lib/R/RetrieveRefId.R b/lib/R/RetrieveRefId.R
new file mode 100644
index 0000000..996ac8c
--- /dev/null
+++ b/lib/R/RetrieveRefId.R
@@ -0,0 +1,86 @@
+### retrieverefid ver 3: changed ensembl ftp filesystem ver 2
+### added user selectable ensembl version
+vars.tmp <- commandArgs()
+split.vars <- unlist(strsplit(vars.tmp[length(vars.tmp)], ","));
+ericscriptfolder <- split.vars[1];
+dbfolder <-  split.vars[2];
+ensversion <- as.integer(split.vars[3]);
+flagprint <- split.vars[4];
+if(ensversion < 0) ensversion <- 0;
+
+resdn <- file.path(dbfolder,"data","_resources");
+rfn <- file.path(resdn,"RefID.RData");
+if(!file.exists(resdn)) dir.create(resdn,recursive=T);
+ctime <- as.integer(Sys.time());
+if(file.exists(rfn)) {
+	dtime <- as.integer(file.info(rfn)$mtime);
+}
+if(!exists("dtime") || is.na(dtime)) dtime <- 0;
+
+# update if necessary
+if(dtime > ctime-10000 && ensversion==0) {
+	z <- new.env();
+	load(rfn,envir=z);
+	ensversion		<- z$version;
+	ensrefid		<- z$refid;
+	ensrefid.path	<- z$refid.path;
+	rm(z);
+	ensversion0 <- ensversion;
+} else {
+# get ensembl version and fasta lists
+cat(xx0.tmp<-readLines("http://ftp.ensembl.org/pub"),file=paste(resdn,"/.ftplist0h",sep=""));
+# get the most recent version
+ensversion0 <- ensversion
+if (ensversion == 0) {
+	i1 <- grep(">release-[0-9]*/</a>",xx0.tmp);
+	ensversion <- max(as.integer(sub(".*>release-([0-9]*)/</a>.*","\\1",xx0.tmp[i1])));
+}
+
+# get available species
+cat(xx1.tmp<-readLines((bfn<-paste("http://ftp.ensembl.org/pub/",ifelse(ensversion>0,paste("release-",ensversion,"/fasta",sep=""),"/current_fasta"),sep=""))),file=paste(resdn,"/.ftplist1h",sep=""),sep="\n");
+# collect names of all available species 
+i1 <- grep("<a href=\"[a-z]",xx1.tmp); 
+xx <- sub(".*<a href[^>]*>([^<]*)/</a>.*","\\1",xx1.tmp[i1]);
+	ensrefid <- xx;
+	ensrefid.path <- c();
+	for(i in 1:length(xx)) tryCatch({
+		xx2.tmp <- readLines(paste(bfn,"/",xx[i],"/dna",sep=""));
+		a1 <- grep("dna.toplevel",xx2.tmp,value=T);
+		if(length(a1)==1) ensrefid.path[i] <- sub(".*<a href=\"([^>]*)\">[^<]*</a>.*","\\1",a1);
+	},error=function(e){});
+	i1 <- which(!is.na(ensrefid.path) & nchar(ensrefid.path)>10);
+	ensrefid <- ensrefid[i1];
+	ensrefid.path <- ensrefid.path[i1];
+
+};	# end if dtime & ctime comparison
+
+if(flagprint != 0) {
+	cat(ifelse(ensversion0!=0,"Selected","Current"),"Ensembl version:", ensversion, "\n");
+
+	if(file.exists(rfn) & any(file.exists(file.path(dbfolder,"data",ensrefid)))) {
+		load(rfn);
+		cat("Installed Ensembl version:", version, "\n")
+	} else {
+		cat("Installed Ensembl version:", "No database installed", "\n")
+	}
+	cat("Available reference IDs:\n", paste("\t", ensrefid, "\n"));
+}
+flag.updatedb <- 1;
+if(file.exists(rfn)) {
+	load(rfn);
+	if(ensversion != version) {
+		flag.updatedb <- 0;
+	}
+} 
+if(flag.updatedb>0) {
+	refid <- ensrefid
+	refid.path <- ensrefid.path
+	version <- ensversion
+}
+
+save(refid, refid.path, version, file = rfn);
+cat(version, file = file.path(resdn, "Ensembl.version"));
+cat(flag.updatedb, file = file.path(resdn, ".flag.updatedb"));
+
+
+
diff --git a/lib/R/SimulateFusions.R b/lib/R/SimulateFusions.R
new file mode 100644
index 0000000..385614e
--- /dev/null
+++ b/lib/R/SimulateFusions.R
@@ -0,0 +1,321 @@
+### simulate data [revised].
+
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+
+readlength <- as.numeric(split.vars[1])
+outputfolder <- split.vars[2]
+ericscriptfolder <- split.vars[3]
+verbose <- as.numeric(split.vars[4])
+ins.size <- as.numeric(split.vars[5])
+sd.inssize <- as.numeric(split.vars[6])
+ngenefusion <- as.numeric(split.vars[7])
+min.coverage <- as.numeric(split.vars[8])
+max.coverage <- as.numeric(split.vars[9])
+nsims <- as.numeric(split.vars[10])
+BE.data <- as.numeric(split.vars[11])
+IE.data <- as.numeric(split.vars[12])
+background.data_1 <- as.character(split.vars[13])
+background.data_2 <- as.character(split.vars[14])
+nreads.background <- as.numeric(split.vars[15])
+dbfolder <- as.character(split.vars[16])
+refid <- as.character(split.vars[17])
+  
+mysyndata <- file.exists(file.path(dbfolder, "data", refid, "EnsemblGene.Transcripts.RData"))
+if (mysyndata == T) {
+  cat("[EricScript simulator] Load genes data ...")
+  load(file.path(dbfolder, "data", refid, "EnsemblGene.Transcripts.RData"))
+} else {
+  cat( paste("[EricScript simulator] You need to download", refid, "data before running EricScript Simulator. Exit.\n"))
+  system(paste("rm -r", outputfolder))
+  quit()
+}
+
+# myurl <- "http://dl.dropbox.com/u/3629305/EnsemblGene.Transcripts.RData"
+# if (mysyndata == T) {
+# 	cat("[EricScript simulator] Load genes data ...")
+# 	load(file.path(dbfolder, "data", "EnsemblGene.Transcripts.RData"))
+# } else {
+# 	cat("[EricScript simulator] Retrieving genes data ...")
+# 	download.file(myurl, destfile = file.path(dbfolder, "data", "EnsemblGene.Transcripts.RData"), quiet = T)
+# 	load(file.path(dbfolder, "data", "EnsemblGene.Transcripts.RData"))
+# 	cat(" done.\n")
+# 	cat("[EricScript simulator] Load genes data ...")
+# 	
+# }
+
+flag.background <- 0
+if (nchar(background.data_1) > 2 & nchar(background.data_2) > 2) {
+	flag.background <- 1
+}
+
+dataset <- c()
+if (BE.data == 1) {
+	dataset <- c(dataset, "BE")
+}
+if (IE.data == 1) {
+	dataset <- c(dataset, "IE")
+}
+formatfasta <- function(myfasta, step = 50) {
+	
+	totalchar <- nchar(myfasta)
+	if (totalchar > step) {
+		steps <- seq(1, totalchar, by = step)
+		newfasta <- rep("", (length(steps) - 1))
+		for (j in 1: (length(steps) - 1)) {
+			aa <- substr(myfasta, steps[j], (steps[j] + (step - 1)))
+			newfasta[j] <- aa 
+		}
+		if ((totalchar - tail(steps, n = 1)) > 0) {
+			newfasta <-  c(newfasta, substr(myfasta, steps[j+1], totalchar))
+		}
+	} else
+	{
+		newfasta <- substr(myfasta, 1, totalchar)
+	}
+	return(newfasta)
+}
+
+## evaluate n.backgound reads
+
+
+TranscriptNames <- as.character(EnsemblGene.Structures$EnsemblGene)
+acceptable.chrs <-  c(seq(1,22), "X", "Y")
+mycoverage <- seq(min.coverage, max.coverage, length.out = ngenefusion)
+minlength <- ins.size + 2*sd.inssize
+if (refid == "homo_sapiens") {
+  ix.geneok <- which((EnsemblGene.Structures$Chromosome %in% acceptable.chrs))
+} else {
+  ix.geneok <- seq(1, length(EnsemblGene.Structures$Chromosome))
+}
+genenameok <- as.character(EnsemblGene.Structures$EnsemblGene)[ix.geneok]
+strandok <- as.character(EnsemblGene.Structures$Strand)[ix.geneok]
+ix.goodseq <- which((nchar(sequences) > 2*minlength) & (TranscriptNames %in% genenameok) & is.na(GeneNames) == F)
+sequences <- sequences[ix.goodseq]
+GeneNames <- GeneNames[ix.goodseq]
+TranscriptNames <- TranscriptNames[ix.goodseq] 
+
+
+formatted.count.tmp <-paste("00000", seq(1, nsims), sep = "")
+formatted.count <- substr(formatted.count.tmp, nchar(formatted.count.tmp) - 4, nchar(formatted.count.tmp))
+formatted.count.tmp <-paste("00000", seq(1, ngenefusion), sep = "")
+formatted.count.fusions <- substr(formatted.count.tmp, nchar(formatted.count.tmp) - 4, nchar(formatted.count.tmp))
+
+
+for (tt in 1: length(dataset)) {
+    dir.create(file.path(outputfolder, dataset[tt]))  
+	dir.create(file.path(outputfolder, dataset[tt], "data"))  
+	dir.create(file.path(outputfolder, dataset[tt], "reads"))  
+    
+    for (jj in 1: nsims) {
+		
+		dir.create(file.path(outputfolder, dataset[tt], "data", paste("sim", formatted.count[jj], sep = "_"))) 
+		dir.create(file.path(outputfolder, dataset[tt], "reads", paste("sim", formatted.count[jj], sep = "_"))) 
+	}
+}
+cat(" done.\n")
+
+for (jj in 1: nsims) {
+	cat("[EricScript simulator] Generating synthetic dataset", formatted.count[jj], "...")
+	
+	if (flag.background == 1) {
+
+    myrandomseed <- round(runif(1, 1, 1000))
+	  system(paste("seqtk sample -s", myrandomseed, " background.data_1 ", nreads.background, " > ", file.path(outputfolder, "background.reads.1.fq") , sep = ""))
+	  system(paste("seqtk sample -s", myrandomseed, " background.data_2 ", nreads.background, " > ", file.path(outputfolder, "background.reads.2.fq") , sep = ""))
+	}
+	
+	ix.gene1 <- rep(0,ngenefusion)
+	ix.gene2 <- rep(0,ngenefusion)
+	strand1 <- rep(0,ngenefusion)
+	strand2 <- rep(0,ngenefusion)
+	flag <- 1
+	mycount <- 0
+	while (flag == 1) {
+		trans1 <- sample(TranscriptNames, ngenefusion)
+		for (ii in 1: ngenefusion) {
+			ix.gene1[ii] <- which(TranscriptNames == trans1[ii])
+			strand1[ii] <- strandok[which(genenameok == trans1[ii])]
+		}
+		gene1 <- GeneNames[ix.gene1]
+		trans2 <- sample(TranscriptNames, ngenefusion)
+		for (ii in 1: ngenefusion) {
+			ix.gene2[ii] <- which(TranscriptNames == trans2[ii])
+			strand2[ii] <- strandok[which(genenameok == trans2[ii])]
+		}
+		gene2 <- GeneNames[ix.gene2]
+		ix.gene12 <- c(ix.gene1, ix.gene2)
+		if (length(unique(ix.gene12)) == 2*ngenefusion & length(unique(GeneNames[ix.gene12])) == 2*ngenefusion) {
+			flag <- 0
+		} else
+		{flag <- 1}
+	}
+	
+	sequence1 <- sequences[ix.gene1]
+	sequence2 <- sequences[ix.gene2]
+	
+	if ("BE" %in% dataset) {
+		
+		myref <- c()
+		junction1.tot <- c()
+		junction2.tot <- c()
+		id.fusions <- rep(0, length(sequence1))
+		sequence.fusions <- rep(0, length(sequence1))
+		sequence.fusions.50bp <- rep(0, length(sequence1))
+		
+		for (i in 1: length(sequence1)) {
+			
+			tmp <- seq.int(100,(nchar(sequence1[i]) - 100))
+			junction1 <- sample(tmp,1)
+			junction1.tot <- c(junction1.tot, junction1)
+			tmp <- seq.int(100,(nchar(sequence2[i]) - 100))
+			junction2 <- sample(tmp,1)
+			junction2.tot <- c(junction2.tot, junction2)
+			sequence.fusions[i] <- paste(substr(sequence1[i], 1, junction1), substr(sequence2[i], junction2 + 1, nchar(sequence2[i])), sep = "")
+			sequence.fusions.50bp[i] <- paste(substr(sequence1[i], (junction1 - 49), junction1), substr(sequence2[i], junction2 + 1, (junction2 + 50)), sep = "")
+			id.fusions[i] <- paste(">", paste(gene1[i], gene2[i], sep = "----"), sep = "")
+			myref <- c(myref, c(id.fusions[i], sequence.fusions[i]))
+			myref.single <- c(id.fusions[i], formatfasta(sequence.fusions[i]))
+			cat(myref.single, file = file.path(outputfolder, "BE", "data", paste("sim", formatted.count[jj], sep = "_"), paste("myref", formatted.count.fusions[i], ".fa", sep = "")), sep = "\n")
+		}
+		
+		GeneFusions <- list()
+		GeneFusions[[1]] <-  gene1
+		GeneFusions[[2]] <-  gene2
+		GeneFusions[[3]] <- junction1.tot
+		GeneFusions[[4]] <- junction2.tot
+		GeneFusions[[5]] <- sequence.fusions.50bp
+		GeneFusions[[6]] <- mycoverage
+		GeneFusions[[7]] <- trans1
+		GeneFusions[[8]] <- trans2
+		names(GeneFusions) <- c("gene1", "gene2", "junction1", "junction2", "junctionseq", "coverage", "trans1", "trans2")
+		save(GeneFusions, file = file.path(outputfolder, "BE", "data", paste("sim", formatted.count[jj], sep = "_"), "GeneFusions.RData"))
+		
+		system(paste(">", file.path(outputfolder, "BE", "reads", paste("sim", formatted.count[jj], sep = "_"), "fusions.reads.1.fq")))
+		system(paste(">", file.path(outputfolder, "BE", "reads", paste("sim", formatted.count[jj], sep = "_"), "fusions.reads.2.fq")))
+		
+		for (i in 1: ngenefusion) {
+			mynreads <- round(mycoverage[i]*nchar(sequence.fusions[i])/(2*readlength))
+			if (verbose == 0) {
+				system(paste("wgsim -d ", ins.size, " -r 0.0001 -R 0.001 -s ", sd.inssize, " -N ", mynreads, " -1 ", readlength, " -2 ", readlength," ", file.path(outputfolder, "BE", "data", paste("sim", formatted.count[jj], sep = "_"), paste("myref", formatted.count.fusions[i], ".fa", sep = "")), " " ,file.path(outputfolder, "BE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.1.fq")," ", file.path(outputfolder, "BE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.2.fq"), " 2>> ", file.path(outputfolder, "wgsim.log"), " 1>> ", file.path(outputfolder, "wgsim.log"), sep = ""))
+			} else {
+				system(paste("wgsim -d ", ins.size, " -r 0.0001 -R 0.001 -s ", sd.inssize, " -N ", mynreads, " -1 ", readlength, " -2 ", readlength," ", file.path(outputfolder, "BE", "data", paste("sim", formatted.count[jj], sep = "_"), paste("myref", formatted.count.fusions[i], ".fa", sep = "")), " " ,file.path(outputfolder, "BE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.1.fq")," ", file.path(outputfolder, "BE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.2.fq"), sep = ""))
+				
+			}
+			
+			system(paste("cat", file.path(outputfolder, "BE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.1.fq"), ">>",  file.path(outputfolder, "BE", "reads", paste("sim", formatted.count[jj], sep = "_"), "fusions.reads.1.fq")))
+			system(paste("cat", file.path(outputfolder, "BE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.2.fq"), ">>",  file.path(outputfolder, "BE", "reads", paste("sim", formatted.count[jj], sep = "_"), "fusions.reads.2.fq")))
+			
+		}
+		
+		if (flag.background == 1) {
+			system(paste("cat ", file.path(outputfolder, "BE", "reads", paste("sim", formatted.count[jj], sep = "_"), "fusions.reads.1.fq"), " ",  file.path(outputfolder, "background.reads.1.fq"), " > ", file.path(outputfolder, "BE", "reads", paste("sim", formatted.count[jj], sep = "_"), "total.reads.1.fq"), sep = ""))
+			system(paste("cat ", file.path(outputfolder, "BE", "reads", paste("sim", formatted.count[jj], sep = "_"), "fusions.reads.2.fq"), " ",  file.path(outputfolder, "background.reads.2.fq"), " > ", file.path(outputfolder, "BE", "reads", paste("sim", formatted.count[jj], sep = "_"), "total.reads.2.fq"), sep = ""))
+		}
+		
+		system(paste("rm", file.path(outputfolder, "BE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.1.fq"))) 
+		system(paste("rm", file.path(outputfolder, "BE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.2.fq"))) 
+		
+		
+	}
+	
+	if ("IE" %in% dataset) {
+		
+		
+		myref <- c()
+		
+		Gene.Table <- EnsemblGene.Structures
+		junction1.tot <- c()
+		junction2.tot <- c()
+		id.fusions <- rep(0, length(sequence1))
+		sequence.fusions <- rep(0, length(sequence1))
+		sequence.fusions.50bp <- rep(0, length(sequence1))
+		genename.table <- as.character(Gene.Table[,1])
+		
+		
+		for (i in 1: length(sequence1)) {
+			ix.genename.table <- which(genename.table == trans1[i])
+			start.exons <- as.numeric(unlist(strsplit(as.character(Gene.Table[ix.genename.table, 7]), ",")))
+			end.exons <- as.numeric(unlist(strsplit(as.character(Gene.Table[ix.genename.table, 8]), ",")))
+			strand <- as.character(Gene.Table[ix.genename.table, 3])
+			if (strand == "+") {
+				tmp <- cumsum((end.exons - start.exons))
+			} else {
+				tmp <- cumsum(rev(end.exons - start.exons))
+			}
+			if (length(tmp) > 1) {
+				junction1 <- sample(tmp,1)
+			} else {
+				junction1 <- tmp
+			}
+			junction1.tot <- c(junction1.tot, junction1)
+			ix.genename.table <- which(genename.table == trans2[i])
+			start.exons <- as.numeric(unlist(strsplit(as.character(Gene.Table[ix.genename.table, 7]), ",")))
+			end.exons <- as.numeric(unlist(strsplit(as.character(Gene.Table[ix.genename.table, 8]), ",")))
+			strand <- as.character(Gene.Table[ix.genename.table, 3])
+			if (strand == "+") {
+				tmp <- cumsum((end.exons - start.exons)) 
+				tmp <- tmp[-length(tmp)]
+			} else {
+				tmp <- cumsum(rev(end.exons - start.exons)) 
+				tmp <- tmp[-length(tmp)]
+			}
+			if (length(tmp) > 1) {
+				junction2 <- sample(tmp,1)
+			} else {
+				junction2 <- 1
+			}
+			junction2.tot <- c(junction2.tot, junction2)
+			sequence.fusions[i] <- paste(substr(sequence1[i], 1, junction1), substr(sequence2[i], junction2 + 1, nchar(sequence2[i])), sep = "")
+			sequence.fusions.50bp[i] <- paste(substr(sequence1[i], (junction1 - 49), junction1), substr(sequence2[i], junction2 + 1, (junction2 + 50)), sep = "")
+			id.fusions[i] <- paste(">", paste(gene1[i], gene2[i], sep = "----"), sep = "")
+			myref <- c(myref, c(id.fusions[i], sequence.fusions[i]))
+			myref.single <- c(id.fusions[i], formatfasta(sequence.fusions[i]))
+			cat(myref.single, file = file.path(outputfolder, "IE", "data", paste("sim", formatted.count[jj], sep = "_"), paste("myref", formatted.count.fusions[i], ".fa", sep = "")), sep = "\n")
+			
+		}
+		
+		GeneFusions <- list()
+		GeneFusions[[1]] <-  gene1
+		GeneFusions[[2]] <-  gene2
+		GeneFusions[[3]] <- junction1.tot
+		GeneFusions[[4]] <- junction2.tot
+		GeneFusions[[5]] <- sequence.fusions.50bp
+		GeneFusions[[6]] <- mycoverage
+		GeneFusions[[7]] <- trans1
+		GeneFusions[[8]] <- trans2
+		names(GeneFusions) <- c("gene1", "gene2", "junction1", "junction2", "junctionseq", "coverage", "trans1", "trans2")
+		save(GeneFusions, file = file.path(outputfolder, "IE", "data", paste("sim", formatted.count[jj], sep = "_"), "GeneFusions.RData"))
+		
+		
+		system(paste(">", file.path(outputfolder, "IE", "reads", paste("sim", formatted.count[jj], sep = "_"), "fusions.reads.1.fq")))
+		system(paste(">", file.path(outputfolder, "IE", "reads", paste("sim", formatted.count[jj], sep = "_"), "fusions.reads.2.fq")))
+		for (i in 1: ngenefusion) {
+			mynreads <- round(mycoverage[i]*nchar(sequence.fusions[i])/(2*readlength))
+			if (verbose == 0) {
+				system(paste("wgsim -d ", ins.size, " -r 0.0001 -R 0.001 -s ", sd.inssize, " -N ", mynreads, " -1 ", readlength, " -2 ", readlength," ", file.path(outputfolder, "IE", "data", paste("sim", formatted.count[jj], sep = "_"), paste("myref", formatted.count.fusions[i], ".fa", sep = "")), " " ,file.path(outputfolder, "IE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.1.fq")," ", file.path(outputfolder, "IE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.2.fq"), " 2>> ", file.path(outputfolder, "wgsim.log"), " 1>> ", file.path(outputfolder, "wgsim.log"), sep = ""))
+			} else {
+				system(paste("wgsim -d ", ins.size, " -r 0.0001 -R 0.001 -s ", sd.inssize, " -N ", mynreads, " -1 ", readlength, " -2 ", readlength," ", file.path(outputfolder, "IE", "data", paste("sim", formatted.count[jj], sep = "_"), paste("myref", formatted.count.fusions[i], ".fa", sep = "")), " " ,file.path(outputfolder, "IE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.1.fq")," ", file.path(outputfolder, "IE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.2.fq"), sep = ""))
+				
+			}
+			system(paste("cat", file.path(outputfolder, "IE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.1.fq"), ">>",  file.path(outputfolder, "IE", "reads", paste("sim", formatted.count[jj], sep = "_"), "fusions.reads.1.fq")))
+			system(paste("cat", file.path(outputfolder, "IE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.2.fq"), ">>",  file.path(outputfolder, "IE", "reads", paste("sim", formatted.count[jj], sep = "_"), "fusions.reads.2.fq")))
+		}
+		
+		if (flag.background == 1) {
+			
+			system(paste("cat ", file.path(outputfolder, "IE", "reads", paste("sim", formatted.count[jj], sep = "_"), "fusions.reads.1.fq"), " ",  file.path(outputfolder, "background.reads.1.fq"), " > ", file.path(outputfolder, "IE", "reads", paste("sim", formatted.count[jj], sep = "_"), "total.reads.1.fq"), sep = ""))
+			system(paste("cat ", file.path(outputfolder, "IE", "reads", paste("sim", formatted.count[jj], sep = "_"), "fusions.reads.2.fq"), " ",  file.path(outputfolder, "background.reads.2.fq"), " > ", file.path(outputfolder, "IE", "reads", paste("sim", formatted.count[jj], sep = "_"), "total.reads.2.fq"), sep = ""))
+		}
+		system(paste("rm", file.path(outputfolder, "IE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.1.fq"))) 
+		system(paste("rm", file.path(outputfolder, "IE", "data", paste("sim", formatted.count[jj], sep = "_"), "out.reads.2.fq"))) 
+	}
+	if (flag.background == 1) {
+		system(paste("rm", file.path(outputfolder, "background.reads.1.fq")))
+		system(paste("rm", file.path(outputfolder, "background.reads.2.fq")))
+	}
+	system(paste("rm", file.path(outputfolder, "wgsim.log")))
+	cat(" done. \n")
+}
+
diff --git a/lib/R/UpdateDB.R b/lib/R/UpdateDB.R
new file mode 100644
index 0000000..ff4ad5c
--- /dev/null
+++ b/lib/R/UpdateDB.R
@@ -0,0 +1,24 @@
+vars.tmp <- commandArgs()
+vars <- vars.tmp[length(vars.tmp)]
+split.vars <- unlist(strsplit(vars, ","))
+ericscriptfolder <- split.vars [1]
+dbfolder <- split.vars[2]
+
+mydblist.tmp <- list.files(file.path(dbfolder, "data")) 
+if (length(mydblist.tmp) > 1) {
+  mydblist <- mydblist.tmp[-which(mydblist.tmp == "_resources")]
+  flag <- scan(file.path(ericscriptfolder, "lib", "data", "_resources", ".flag.updatedb"), what = "numeric", quiet = T)
+  if (flag == 0) {
+    cat("[EricScript] Nothing to update. Exit.\n", sep = "")    
+  } else
+  {  
+    cat("[EricScript] Found a new release of Ensembl Gene. Updating database for ", toString(mydblist),".\n", sep = "")    
+    for (i in 1: length(mydblist)) {
+      system(paste("sh", file.path(ericscriptfolder, "lib", "bash", "BuildSeq.sh"), ericscriptfolder, mydblist[i]))
+    }
+  }
+} else {
+  cat("[EricScript] No database was found in ", file.path(dbfolder, "data"), ". Please run ericscript.pl --downdb to download your databases.\n", sep = "")
+}
+
+
diff --git a/lib/bash/BuildSeq.sh b/lib/bash/BuildSeq.sh
new file mode 100644
index 0000000..f1ffdef
--- /dev/null
+++ b/lib/bash/BuildSeq.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+ericscriptfolder=$1
+refid=$2
+dbfolder=$3
+ensversion=$4
+myrandomn=$RANDOM
+tmpfolder=$dbfolder/".tmp_"$myrandomn
+mkdir $tmpfolder
+printf "[EricScript] Downloading $refid data. This process may take from few minutes to few hours depending on the selected genome ..."
+R --slave --args $ericscriptfolder,$dbfolder,$refid,$tmpfolder,$ensversion < $ericscriptfolder/lib/R/DownloadDB.R
+flagrefid=`cat $tmpfolder/.refid.flag` 
+if [ $flagrefid -eq 1 ]
+then
+bedtools sort -i $tmpfolder/exonstartend.txt | bedtools merge -c 4 -o collapse -i - | cut -d ',' -f1 - | awk '{print $4"\t"($2-1)"\t"$3"\t"$1}' - > $tmpfolder/exonstartend.mrg.txt
+seqtk subseq $tmpfolder/seq.fa.gz $tmpfolder/exonstartend.mrg.txt > $tmpfolder/subseq.fa
+printf "done.\n"
+printf "[EricScript] Creating database for $refid ..."
+R --slave --args $ericscriptfolder,$refid,$dbfolder,$tmpfolder < $ericscriptfolder/lib/R/BuildExonUnionModel.R
+R --slave --args $ericscriptfolder,$refid,$dbfolder,$tmpfolder < $ericscriptfolder/lib/R/ConvertTxt2R.R
+R --slave --args $refid,$dbfolder,$tmpfolder < $ericscriptfolder/lib/R/CreateDataEricTheSimulator.R 
+if [ $refid == "homo_sapiens" ]
+then
+seqtk subseq -l 50 $tmpfolder/seq.fa.gz $tmpfolder/chrlist > $dbfolder/data/$refid/allseq.fa
+else
+gunzip -c -d $tmpfolder/seq.fa.gz > $dbfolder/data/$refid/allseq.fa
+fi
+printf "done.\n"
+printf "[EricScript] Building reference indexes with BWA for transcriptome and genome ..."
+bwa index $dbfolder/data/$refid/allseq.fa 1>> $tmpfolder/.tmp.log 2>> $tmpfolder/.tmp.log
+bwa index $dbfolder/data/$refid/EnsemblGene.Reference.fa 1>> $tmpfolder/.tmp.log 2>> $tmpfolder/.tmp.log
+printf "done.\n"
+fi
+printf "[EricScript] Removing temporary files ..."
+rm -r $tmpfolder
+printf "done.\n"
diff --git a/lib/bash/RunEric.sh b/lib/bash/RunEric.sh
new file mode 100644
index 0000000..bb28e89
--- /dev/null
+++ b/lib/bash/RunEric.sh
@@ -0,0 +1,276 @@
+#!/bin/bash
+## bash pipeline
+outputfolder=$1
+. $outputfolder/ericscript.vars 
+if [ $verbose -eq 1 ]; then
+touch $outputfolder/out/.ericscript.log
+fi
+printf "\n[EricScript] Starting EricScript analysis for sample $samplename.\n"
+if [ $flagbin -eq 0 ]; then
+readlength=$((`head -n 2 $reads_1 | tail -n 1 | wc -m` - 1))
+else
+readlength=$((`gunzip -c $reads_1 | head -n 2 | tail -n 1 | wc -m` - 1))
+fi
+if [ $ntrim -lt $readlength -a $ntrim -ne -1 ]; then
+if [ $ntrim -lt 36 -a $ntrim -ge 0 ]; then
+printf "[EricScript] Minimum trimming value is 36. Reads will be trimmed to 36 nt.\n"
+ntrim=36
+fi
+if [ $ntrim -eq -1 -a $readlength -ge 70 ]; then
+ntrim=50
+fi
+printf "[EricScript] Trimming PE reads to $ntrim nt ..."
+if [ $flagbin -eq 0 ]; then
+perl $ericscriptfolder/lib/perl/trimfq.pl $reads_1 $ntrim $outputfolder/aln/$samplename.1.fq.trimmed
+perl $ericscriptfolder/lib/perl/trimfq.pl $reads_2 $ntrim $outputfolder/aln/$samplename.2.fq.trimmed
+printf "done. \n"
+else
+gunzip -c $reads_1 | perl $ericscriptfolder/lib/perl/trimfq.pl - $ntrim $outputfolder/aln/$samplename.1.fq.trimmed
+gunzip -c $reads_2 | perl $ericscriptfolder/lib/perl/trimfq.pl - $ntrim $outputfolder/aln/$samplename.2.fq.trimmed
+fi
+if [ $verbose -eq 0 ]; then
+printf "[EricScript] Aligning with bwa ..."
+if [ $bwa_aln -eq 1 ]; then
+bwa aln -R 5 -t $nthreads $myref $outputfolder/aln/$samplename.1.fq.trimmed > $outputfolder/aln/"$samplename"_1.sai 2>> $outputfolder/out/.ericscript.log
+bwa aln -R 5 -t $nthreads $myref $outputfolder/aln/$samplename.2.fq.trimmed > $outputfolder/aln/"$samplename"_2.sai 2>> $outputfolder/out/.ericscript.log
+fi
+if [ $MAPQ -gt 0 ]; then
+if [ $bwa_aln -eq 1 ]; then
+bwa sampe -P -c 0.001 $myref $outputfolder/aln/"$samplename"_1.sai $outputfolder/aln/"$samplename"_2.sai $outputfolder/aln/$samplename.1.fq.trimmed $outputfolder/aln/$samplename.2.fq.trimmed > $outputfolder/aln/"$samplename".sam 2>> $outputfolder/out/.ericscript.log
+else
+bwa mem -t $nthreads $myref $outputfolder/aln/$samplename.1.fq.trimmed $outputfolder/aln/$samplename.2.fq.trimmed > $outputfolder/aln/"$samplename".sam 2>> $outputfolder/out/.ericscript.log
+fi
+else
+if [ $bwa_aln -eq 1 ]; then
+bwa sampe -P -c 0.001 $myref $outputfolder/aln/"$samplename"_1.sai $outputfolder/aln/"$samplename"_2.sai $outputfolder/aln/$samplename.1.fq.trimmed $outputfolder/aln/$samplename.2.fq.trimmed > $outputfolder/aln/tmp.sam 2>> $outputfolder/out/.ericscript.log
+else
+bwa mem -Y -t $nthreads $myref $outputfolder/aln/$samplename.1.fq.trimmed $outputfolder/aln/$samplename.2.fq.trimmed > $outputfolder/aln/tmp.sam 2>> $outputfolder/out/.ericscript.log
+fi
+cat $outputfolder/aln/tmp.sam | $ericscriptfolder/lib/perl/xa2multi.pl > $outputfolder/aln/"$samplename".sam
+fi
+else
+printf "[EricScript] Aligning with bwa ...\n"
+if [ $bwa_aln -eq 1 ]; then
+bwa aln -t $nthreads $myref $outputfolder/aln/$samplename.1.fq.trimmed > $outputfolder/aln/"$samplename"_1.sai
+bwa aln -t $nthreads $myref $outputfolder/aln/$samplename.2.fq.trimmed > $outputfolder/aln/"$samplename"_2.sai
+fi
+if [ $MAPQ -gt 0 ]; then
+if [ $bwa_aln -eq 1 ]; then
+bwa sampe -P -c 0.001 $myref $outputfolder/aln/"$samplename"_1.sai $outputfolder/aln/"$samplename"_2.sai $outputfolder/aln/$samplename.1.fq.trimmed $outputfolder/aln/$samplename.2.fq.trimmed > $outputfolder/aln/"$samplename".sam
+else
+bwa mem -t $nthreads $myref $outputfolder/aln/$samplename.1.fq.trimmed $outputfolder/aln/$samplename.2.fq.trimmed > $outputfolder/aln/"$samplename".sam
+fi
+else
+if [ $bwa_aln -eq 1 ]; then
+bwa sampe -P -c 0.001 $myref $outputfolder/aln/"$samplename"_1.sai $outputfolder/aln/"$samplename"_2.sai $outputfolder/aln/$samplename.1.fq.trimmed $outputfolder/aln/$samplename.2.fq.trimmed | $ericscriptfolder/lib/perl/xa2multi.pl > $outputfolder/aln/"$samplename".sam
+else
+bwa mem -Y -t $nthreads $myref $outputfolder/aln/$samplename.1.fq.trimmed $outputfolder/aln/$samplename.2.fq.trimmed | $ericscriptfolder/lib/perl/xa2multi.pl > $outputfolder/aln/"$samplename".sam
+fi
+fi
+fi
+else
+if [ $ntrim -ge $readlength ]; then
+printf "[EricScript] Selected trimming value is greater equal to read length. Reads will not be trimmed.\n"
+fi
+if [ $verbose -eq 0 ]; then
+printf "[EricScript] Aligning with bwa ..."
+if [ $bwa_aln -eq 1 ]; then
+bwa aln -R 5 -t $nthreads $myref $reads_1 > $outputfolder/aln/"$samplename"_1.sai 2>> $outputfolder/out/.ericscript.log
+bwa aln -R 5 -t $nthreads $myref $reads_2 > $outputfolder/aln/"$samplename"_2.sai 2>> $outputfolder/out/.ericscript.log
+fi
+if [ $MAPQ -gt 0 ]; then
+if [ $bwa_aln -eq 1 ]; then
+bwa sampe -P -c 0.001 $myref $outputfolder/aln/"$samplename"_1.sai $outputfolder/aln/"$samplename"_2.sai $reads_1 $reads_2 > $outputfolder/aln/"$samplename".sam 2>> $outputfolder/out/.ericscript.log
+else
+bwa mem -t $nthreads $myref $reads_1 $reads_2 > $outputfolder/aln/"$samplename".sam 2>> $outputfolder/out/.ericscript.log
+fi
+else
+if [ $bwa_aln -eq 1 ]; then
+bwa sampe -P -c 0.001 $myref $outputfolder/aln/"$samplename"_1.sai $outputfolder/aln/"$samplename"_2.sai $reads_1 $reads_2 > $outputfolder/aln/tmp.sam 2>> $outputfolder/out/.ericscript.log
+else
+bwa mem -Y -t $nthreads $myref $reads_1 $reads_2 > $outputfolder/aln/tmp.sam 2>> $outputfolder/out/.ericscript.log
+fi
+cat $outputfolder/aln/tmp.sam | $ericscriptfolder/lib/perl/xa2multi.pl > $outputfolder/aln/"$samplename".sam
+fi
+else
+printf "[EricScript] Aligning with bwa ...\n"
+if [ $bwa_aln -eq 1 ]; then
+bwa aln -t $nthreads $myref $reads_1 > $outputfolder/aln/"$samplename"_1.sai 
+bwa aln -t $nthreads $myref $reads_2 > $outputfolder/aln/"$samplename"_2.sai 
+fi
+if [ $MAPQ -gt 0 ]; then
+if [ $bwa_aln -eq 1 ]; then
+bwa sampe -P -c 0.001 $myref $outputfolder/aln/"$samplename"_1.sai $outputfolder/aln/"$samplename"_2.sai $reads_1 $reads_2 > $outputfolder/aln/"$samplename".sam 
+else
+bwa mem -t $nthreads $myref $reads_1 $reads_2 > $outputfolder/aln/"$samplename".sam
+fi
+else
+if [ $bwa_aln -eq 1 ]; then
+bwa sampe -P -c 0.001 $myref $outputfolder/aln/"$samplename"_1.sai $outputfolder/aln/"$samplename"_2.sai $reads_1 $reads_2 | $ericscriptfolder/lib/perl/xa2multi.pl > $outputfolder/aln/"$samplename".sam
+else
+bwa mem -Y -t $nthreads $myref $reads_1 $reads_2 | $ericscriptfolder/lib/perl/xa2multi.pl > $outputfolder/aln/"$samplename".sam
+fi
+fi
+fi
+fi
+printf "done. \n"
+R --slave --args $outputfolder,$bwa_aln <  $ericscriptfolder/lib/R/ExtractInsertSize.R
+printf "[EricScript] Extracting discordant alignments ... "
+grep -v '^@' $outputfolder/aln/"$samplename".sam | awk -v mapq="$MAPQ" '(($7!="=") && ($7!="*") && ($5>=mapq)) { print }' | cut -f2,3,4,5,7,8 > $outputfolder/out/"$samplename".filtered.out
+R --slave --args $samplename,$outputfolder,$ericscriptfolder,$minreads,$MAPQ,$refid,$dbfolder < $ericscriptfolder/lib/R/MakeAdjacencyMatrix.R
+myflag=`cat $outputfolder/out/.ericscript.flag`
+if [ $myflag -eq 0 ]; then
+printf "done. \n"
+printf "[EricScript] No chimeric transcripts found! Writing results ..."
+R --slave --args $samplename,$outputfolder <  $ericscriptfolder/lib/R/MakeEmptyResults.R
+printf "done. \n"
+exit 1
+fi
+printf "done. \n"
+printf "[EricScript] Building exon junction reference ... "
+R --slave --args $samplename,$outputfolder,$ericscriptfolder,$readlength,$refid,$dbfolder < $ericscriptfolder/lib/R/BuildFasta.R
+printf "done. \n"
+## Aligning to putative junction reference
+if [ $verbose -eq 0 ]; then
+printf "[EricScript] Aligning to exon junction reference ... "
+bwa index $mynewref 1>> $outputfolder/out/.ericscript.log 2>> $outputfolder/out/.ericscript.log
+if [ $bwa_aln -eq 1 ]; then
+bwa aln -t $nthreads $mynewref $reads_1 > $outputfolder/aln/"$samplename"_1.remap.sai 2>> $outputfolder/out/.ericscript.log
+bwa aln -t $nthreads $mynewref $reads_2 > $outputfolder/aln/"$samplename"_2.remap.sai 2>> $outputfolder/out/.ericscript.log
+fi
+if [ $MAPQ -gt 0 ]; then
+if [ $bwa_aln -eq 1 ]; then
+bwa sampe -P $mynewref $outputfolder/aln/"$samplename"_1.remap.sai $outputfolder/aln/"$samplename"_2.remap.sai $reads_1 $reads_2 > $outputfolder/aln/$samplename.remap.sam 2>> $outputfolder/out/.ericscript.log
+else
+bwa mem -t $nthreads $mynewref $reads_1 $reads_2 > $outputfolder/aln/$samplename.remap.sam 2>> $outputfolder/out/.ericscript.log
+fi
+else
+if [ $bwa_aln -eq 1 ]; then
+bwa sampe -P $mynewref $outputfolder/aln/"$samplename"_1.remap.sai $outputfolder/aln/"$samplename"_2.remap.sai $reads_1 $reads_2 > $outputfolder/aln/tmp.sam 2>> $outputfolder/out/.ericscript.log
+else
+bwa mem -Y -t $nthreads $mynewref $reads_1 $reads_2 > $outputfolder/aln/tmp.sam 2>> $outputfolder/out/.ericscript.log
+fi
+cat $outputfolder/aln/tmp.sam | $ericscriptfolder/lib/perl/xa2multi.pl > $outputfolder/aln/$samplename.remap.sam
+fi
+samtools view -@ $nthreads -bS -o $outputfolder/aln/$samplename.remap.bam $outputfolder/aln/$samplename.remap.sam 1>> $outputfolder/out/.ericscript.log 2>> $outputfolder/out/.ericscript.log
+samtools sort -@ $nthreads $outputfolder/aln/$samplename.remap.bam $outputfolder/aln/$samplename.remap.sorted 1>> $outputfolder/out/.ericscript.log 2>> $outputfolder/out/.ericscript.log
+samtools index $outputfolder/aln/$samplename.remap.sorted.bam 1>> $outputfolder/out/.ericscript.log
+else
+printf "[EricScript] Aligning to exon junction reference ... \n"
+bwa index $mynewref
+if [ $bwa_aln -eq 1 ]; then
+bwa aln -t $nthreads $mynewref $reads_1 > $outputfolder/aln/"$samplename"_1.remap.sai 
+bwa aln -t $nthreads $mynewref $reads_2 > $outputfolder/aln/"$samplename"_2.remap.sai 
+fi
+if [ $MAPQ -gt 0 ]; then
+if [ $bwa_aln -eq 1 ]; then
+bwa sampe -P $mynewref $outputfolder/aln/"$samplename"_1.remap.sai $outputfolder/aln/"$samplename"_2.remap.sai $reads_1 $reads_2 > $outputfolder/aln/$samplename.remap.sam 
+else
+bwa mem -t $nthreads $mynewref $reads_1 $reads_2 > $outputfolder/aln/$samplename.remap.sam
+fi
+else
+if [ $bwa_aln -eq 1 ]; then
+bwa sampe -P $mynewref $outputfolder/aln/"$samplename"_1.remap.sai $outputfolder/aln/"$samplename"_2.remap.sai $reads_1 $reads_2 | $ericscriptfolder/lib/perl/xa2multi.pl > $outputfolder/aln/$samplename.remap.sam 
+else
+bwa mem -Y -t $nthreads $mynewref $reads_1 $reads_2 | $ericscriptfolder/lib/perl/xa2multi.pl > $outputfolder/aln/$samplename.remap.sam
+fi
+fi
+samtools view -@ $nthreads -bS -o $outputfolder/aln/$samplename.remap.bam $outputfolder/aln/$samplename.remap.sam
+samtools sort -@ $nthreads $outputfolder/aln/$samplename.remap.bam $outputfolder/aln/$samplename.remap.sorted
+samtools index $outputfolder/aln/$samplename.remap.sorted.bam
+fi
+printf "done. \n"
+## Recalibrating junctions
+printf "[EricScript] Recalibrating junctions ... "
+R --slave --args $samplename,$outputfolder,$readlength,$verbose < $ericscriptfolder/lib/R/RecalibrateJunctions.R
+cat $outputfolder/out/$samplename.EricScript.junctions.recalibrated.fa $myref > $mynewref_recal
+printf "done. \n"
+## Aligning not properly mapped reads
+if [ $verbose -eq 0 ]; then
+printf "[EricScript] Aligning to recalibrated junction reference ... "
+bwa index $mynewref_recal 1>> $outputfolder/out/.ericscript.log 2>> $outputfolder/out/.ericscript.log
+if [ $bwa_aln -eq 1 ]; then
+bwa aln -R 5 -t $nthreads $mynewref_recal $reads_1 > $outputfolder/aln/"$samplename"_1.remap.recal.sai 2>> $outputfolder/out/.ericscript.log
+bwa aln -R 5 -t $nthreads $mynewref_recal $reads_2 > $outputfolder/aln/"$samplename"_2.remap.recal.sai 2>> $outputfolder/out/.ericscript.log
+bwa sampe -P $mynewref_recal $outputfolder/aln/"$samplename"_1.remap.recal.sai $outputfolder/aln/"$samplename"_2.remap.recal.sai $reads_1 $reads_2 > $outputfolder/aln/tmp.sam 2>> $outputfolder/out/.ericscript.log
+else
+bwa mem -Y -t $nthreads $mynewref_recal $reads_1 $reads_2 > $outputfolder/aln/tmp.sam 2>> $outputfolder/out/.ericscript.log
+fi
+cat $outputfolder/aln/tmp.sam | $ericscriptfolder/lib/perl/xa2multi.pl > $outputfolder/aln/$samplename.remap.recal.sam
+samtools view -@ $nthreads -bt $mynewref_recal -o $outputfolder/aln/$samplename.remap.recal.bam $outputfolder/aln/$samplename.remap.recal.sam 1>> $outputfolder/out/.ericscript.log 2>> $outputfolder/out/.ericscript.log
+samtools sort -@ $nthreads $outputfolder/aln/$samplename.remap.recal.bam $outputfolder/aln/$samplename.remap.recal.sorted 1>> $outputfolder/out/.ericscript.log 2>> $outputfolder/out/.ericscript.log
+samtools rmdup $outputfolder/aln/$samplename.remap.recal.sorted.bam $outputfolder/aln/$samplename.remap.recal.sorted.rmdup.bam 1>> $outputfolder/out/.ericscript.log 2>> $outputfolder/out/.ericscript.log
+samtools index $outputfolder/aln/$samplename.remap.recal.sorted.rmdup.bam 1>> $outputfolder/out/.ericscript.log 
+samtools view -@ $nthreads -b -h -q 1 $outputfolder/aln/$samplename.remap.recal.sorted.rmdup.bam > $outputfolder/aln/$samplename.remap.recal.sorted.rmdup.q1.bam
+samtools index $outputfolder/aln/$samplename.remap.recal.sorted.rmdup.q1.bam
+else
+printf "[EricScript] Aligning to recalibrated junction reference ... \n"
+bwa index  $mynewref_recal 
+if [ $bwa_aln -eq 1 ]; then
+bwa aln -R 5 -t $nthreads $mynewref_recal $reads_1 > $outputfolder/aln/"$samplename"_1.remap.recal.sai 
+bwa aln -R 5 -t $nthreads $mynewref_recal $reads_2 > $outputfolder/aln/"$samplename"_2.remap.recal.sai 
+bwa sampe -P $mynewref_recal $outputfolder/aln/"$samplename"_1.remap.recal.sai $outputfolder/aln/"$samplename"_2.remap.recal.sai $reads_1 $reads_2 | $ericscriptfolder/lib/perl/xa2multi.pl > $outputfolder/aln/$samplename.remap.recal.sam 
+else
+bwa mem -Y -t $nthreads $mynewref_recal $reads_1 $reads_2 | $ericscriptfolder/lib/perl/xa2multi.pl > $outputfolder/aln/$samplename.remap.recal.sam
+fi
+samtools view -@ $nthreads -bt $mynewref_recal -o $outputfolder/aln/$samplename.remap.recal.bam $outputfolder/aln/$samplename.remap.recal.sam 
+samtools sort -@ $nthreads $outputfolder/aln/$samplename.remap.recal.bam $outputfolder/aln/$samplename.remap.recal.sorted
+samtools rmdup $outputfolder/aln/$samplename.remap.recal.sorted.bam $outputfolder/aln/$samplename.remap.recal.sorted.rmdup.bam 
+samtools index $outputfolder/aln/$samplename.remap.recal.sorted.rmdup.bam 
+samtools view -@ $nthreads -b -h -q 1 $outputfolder/aln/$samplename.remap.recal.sorted.rmdup.bam > $outputfolder/aln/$samplename.remap.recal.sorted.rmdup.q1.bam
+samtools index $outputfolder/aln/$samplename.remap.recal.sorted.rmdup.q1.bam
+fi
+printf "done. \n"
+samtools idxstats $outputfolder/aln/$samplename.remap.recal.sorted.rmdup.q1.bam > $outputfolder/out/$samplename.stats
+rm $outputfolder/aln/*.sam
+## Estimating spanning reads
+printf "[EricScript] Scoring candidate fusions ..."
+R --slave --args $samplename,$outputfolder,$readlength <  $ericscriptfolder/lib/R/EstimateSpanningReads.R
+myflag=`cat $outputfolder/out/.ericscript.flag`
+if [ $myflag -eq 0 ]; then
+printf "done. \n"
+printf "[EricScript] No chimeric transcripts found! Writing results ..."
+R --slave --args $samplename,$outputfolder <  $ericscriptfolder/lib/R/MakeEmptyResults.R
+printf "done. \n"
+exit 1
+fi
+printf "done. \n"
+printf "[EricScript] Filtering candidate fusions ..."
+if [ $verbose -eq 0 ]; then
+samtools mpileup -A -f $mynewref_recal -l $outputfolder/out/$samplename.intervals $outputfolder/aln/$samplename.remap.recal.sorted.rmdup.bam > $outputfolder/out/$samplename.remap.recal.sorted.rmdup.pileup 2>> $outputfolder/out/.ericscript.log
+else
+samtools mpileup -A -f $mynewref_recal -l $outputfolder/out/$samplename.intervals $outputfolder/aln/$samplename.remap.recal.sorted.rmdup.bam > $outputfolder/out/$samplename.remap.recal.sorted.rmdup.pileup
+fi
+cut -f1,2,3 $outputfolder/out/$samplename.remap.recal.sorted.rmdup.pileup | grep -e '[0-9]----[a-z | A-Z]' - >  $outputfolder/out/$samplename.intervals.pileup
+R --slave --args $samplename,$outputfolder <  $ericscriptfolder/lib/R/BuildNeighbourhoodSequences.R
+if [ $verbose -eq 0 ]; then
+blat $myref $outputfolder/out/.link $outputfolder/out/$samplename.checkselfhomology.blat -out=blast8 1>> $outputfolder/out/.ericscript.log
+else
+blat $myref $outputfolder/out/.link $outputfolder/out/$samplename.checkselfhomology.blat -out=blast8
+fi
+R --slave --args $samplename,$outputfolder <  $ericscriptfolder/lib/R/CheckSelfHomology.R
+myflag=`cat $outputfolder/out/.ericscript.flag`
+if [ $myflag -eq 0 ]; then
+printf "done. \n"
+printf "[EricScript] No chimeric transcripts found! Writing results ..."
+R --slave --args $samplename,$outputfolder <  $ericscriptfolder/lib/R/MakeEmptyResults.R
+printf "done. \n"
+exit 1
+fi
+printf "done. \n"
+## Writing results
+if [ $verbose -eq 0 ]; then
+printf "[EricScript] Writing results ... "
+else
+printf "[EricScript] Writing results ... \n"
+fi
+R --slave --args $samplename,$outputfolder,$ericscriptfolder,$readlength,$verbose,$refid,$dbfolder <  $ericscriptfolder/lib/R/MakeResults.R
+printf "done. \n"
+rm $outputfolder/out/*fai
+if [ $removetemp -eq 1 ]; then
+printf "[EricScript] Removing temporary files ... "
+rm -r $outputfolder/aln
+rm -r $outputfolder/out
+printf "done. \n"
+fi 
+printf "[EricScript] Open $outputfolder/$samplename.results* to view the results of EricScript analysis.\n"
diff --git a/lib/demo/myreads_1.fq.gz b/lib/demo/myreads_1.fq.gz
new file mode 100644
index 0000000..74520e3
Binary files /dev/null and b/lib/demo/myreads_1.fq.gz differ
diff --git a/lib/demo/myreads_2.fq.gz b/lib/demo/myreads_2.fq.gz
new file mode 100644
index 0000000..4a7e168
Binary files /dev/null and b/lib/demo/myreads_2.fq.gz differ
diff --git a/lib/perl/retrievefrombiomart.pl b/lib/perl/retrievefrombiomart.pl
new file mode 100755
index 0000000..919a1dd
--- /dev/null
+++ b/lib/perl/retrievefrombiomart.pl
@@ -0,0 +1,75 @@
+# script by BioMart 
+# feb16:  mod with selection of genome release
+
+use strict;
+use LWP::UserAgent;
+
+my $ensversion = $ARGV[1];
+
+open (FH,$ARGV[0]) || die ("\nUsage: perl webExample.pl Query.xml\n\n");
+
+my $xml;
+while (<FH>){
+    $xml .= $_;
+}
+close(FH);
+
+my $path = "";
+
+if ($ensversion == 70) {
+        $path = "http://jan2013.archive.ensembl.org/biomart/martservice?";
+}
+elsif ($ensversion == 71) {
+        $path = "http://apr2013.archive.ensembl.org/biomart/martservice?";
+}
+elsif ($ensversion == 72) {
+        $path = "http://jun2013.archive.ensembl.org/biomart/martservice?";
+}
+elsif ($ensversion == 73) {
+        $path = "http://sep2013.archive.ensembl.org/biomart/martservice?";
+}
+elsif ($ensversion == 74) {
+        $path = "http://dec2013.archive.ensembl.org/biomart/martservice?";
+}
+elsif ($ensversion == 75) {
+        $path = "http://feb2014.archive.ensembl.org/biomart/martservice?";
+}
+elsif ($ensversion == 76) {
+        $path = "http://aug2014.archive.ensembl.org/biomart/martservice?";
+}
+elsif ($ensversion == 77) {
+        $path = "http://oct2014.archive.ensembl.org/biomart/martservice?";
+}
+elsif ($ensversion == 78) {
+        $path = "http://dec2014.archive.ensembl.org/biomart/martservice?";
+}
+elsif ($ensversion == 79) {
+        $path = "http://mar2015.archive.ensembl.org/biomart/martservice?";
+}
+elsif ($ensversion == 80) {
+        $path = "http://may2015.archive.ensembl.org/biomart/martservice?";
+}
+elsif ($ensversion == 81) {
+        $path = "http://jul2015.archive.ensembl.org/biomart/martservice?";
+}
+elsif ($ensversion == 82) {
+        $path = "http://sep2015.archive.ensembl.org/biomart/martservice?";
+} 
+elsif ($ensversion > 82) {
+	$path = "http://www.ensembl.org/biomart/martservice?";
+}
+my $request = HTTP::Request->new("POST",$path,HTTP::Headers->new(),'query='.$xml."\n");
+my $ua = LWP::UserAgent->new;
+my $response;
+
+$ua->request($request, 
+	     sub{   
+		 my($data, $response) = @_;
+		 if ($response->is_success) {
+		     print "$data";
+		 }
+		 else {
+		     warn ("Problems with the web server: ".$response->status_line);
+		 }
+	     },1000);
+
diff --git a/lib/perl/trimfq.pl b/lib/perl/trimfq.pl
new file mode 100755
index 0000000..27287c6
--- /dev/null
+++ b/lib/perl/trimfq.pl
@@ -0,0 +1,27 @@
+#!/usr/bin/perl
+use warnings;
+use strict;
+
+my($listin) = $ARGV[0];
+my($ntrim) = $ARGV[1];
+my($outfile) = $ARGV[2];
+
+open LIST, "${listin}" or die $!;
+open OUT, ">$outfile";
+my $a;
+my $count = 1;
+while (<LIST>)
+
+   {
+	if ($count++ % 2 == 0) {
+   	$a = substr($_, 0, $ntrim);
+   	print OUT "$a\n";
+   	} else 
+   	{
+   	print OUT "$_"; 
+   	}      
+   
+   }
+
+close LIST;
+close OUT;
diff --git a/lib/perl/xa2multi.pl b/lib/perl/xa2multi.pl
new file mode 100755
index 0000000..4e813be
--- /dev/null
+++ b/lib/perl/xa2multi.pl
@@ -0,0 +1,26 @@
+#!/usr/bin/perl -w
+
+use strict;
+use warnings;
+
+while (<>) {
+	if (/\tXA:Z:(\S+)/) {
+		my $l = $1;
+		print;
+		my @t = split("\t");
+		while ($l =~ /([^,;]+),([-+]\d+),([^,]+),(\d+);/g) {
+			my $mchr = ($t[6] eq $1)? '=' : $t[6]; # FIXME: TLEN/ISIZE is not calculated!
+			my $seq = $t[9];
+			my $phred = $t[10];
+			# if alternative alignment has other orientation than primary, 
+			# then print the reverse (complement) of sequence and phred string
+			if ((($t[1]&0x10)>0) xor ($2<0)) {
+				$seq = reverse $seq;
+				$seq =~ tr/ACGTacgt/TGCAtgca/;
+				$phred = reverse $phred;
+			}
+			print(join("\t", $t[0], ($t[1]&0x6e9)|($2<0?0x10:0), $1, abs($2), 0, $3, @t[6..7], 0, $seq, $phred, "NM:i:$4"), "\n");
+		}
+	} else { print; }
+}
+