diff --git a/.travis.yml b/.travis.yml index c721031..d3829fb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,9 +23,13 @@ install: - conda info -a # Replace dep1 dep2 ... with your dependencies - - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION dep1 dep2 ... - - source activate test-environment + - conda install --file requirements.txt + - conda install --file tests/requirements-dev.txt + - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then conda install unittest2; fi + - if [[ $TRAVIS_PYTHON_VERSION == '3.4' ]]; then conda install robotframework-python3; else conda install robotframework; fi - python setup.py install - script: - # Your test script goes here + - nosetests --with-coverage --cover-erase --cover-package=bioframework + - pybot tests/*.robot +after_success: + - coveralls diff --git a/bioframework/README.md b/bioframework/README.md new file mode 100644 index 0000000..dcf404a --- /dev/null +++ b/bioframework/README.md @@ -0,0 +1,35 @@ +# Simple jip pipeline + +[Jip Docs](http://pyjip.readthedocs.org/en/latest) + +# Dirty install + +Would be fun later to maybe make a task to install executables inside a jip task +using conda. + +For now do it manually +``` +conda install bwa samtools cutadapt pyzmq +pip install pyjip +wget https://github.com/gnuplot/gnuplot/archive/Release_4_6_0.tar.gz -O- | tar xzvf - +cd gnuplot* +./prepare +./configure --prefix=$(cd $(dirname $(dirname $(which conda))) && pwd) +make && make install +``` + +# Execute pipeline + +For the examples/tests I ran I used the 947 sample that comes with the ngs_mapper pipeline + +For now you have to supply `-i` at the end or it tries to submit multiple jobs for each fastq + +``` +./simplepipe.jip -r ../../functional/947.ref.fasta -f ../../functional/947/947_S32_L001_R{1,2}_001_2013_12_17.fastq -i +``` + +## Dry run + +``` +./simplepipe.jip -r ../../functional/947.ref.fasta -f ../../functional/947/947_S32_L001_R{1,2}_001_2013_12_17.fastq -- --show --dry +``` diff --git a/bioframework/__init__.py b/bioframework/__init__.py new file mode 100644 index 0000000..f16137c --- /dev/null +++ b/bioframework/__init__.py @@ -0,0 +1,5 @@ +__projectname__ = 'bioframework' +__release__ = '0.0.1' +__authors__ = "Tyghe Vallard, Michael Panciera" +__description__ = "Basic building blocks for bio-pipelines" +__keywords__ = "pyjip, pipeline, bioinformatics" diff --git a/bioframework/util.py b/bioframework/util.py new file mode 100644 index 0000000..617d27e --- /dev/null +++ b/bioframework/util.py @@ -0,0 +1,34 @@ +import sys + +import os.path + +def normalize_handle(option, default=sys.stdin, mode='r'): + ''' + Get file handle for file option + ''' + # Normalize input to handle + if option.get(): + return open(option.get(), mode) + else: + return default + +def format_from_ext(option_or_path): + ''' + Return file format from option or path + + >>> format_from_ext('/path/file.format') + 'format' + >>> format_from_ext('file.format') + 'format' + >>> format_from_ext('file') + Traceback (most recent call last): + ... + ValueError: no extension + ''' + pth = option_or_path + if hasattr(option_or_path, 'get'): + pth = option_or_path.get() + path, ext = os.path.splitext(pth) + if not ext.startswith('.'): + raise ValueError('no extension') + return ext[1:] diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..0b88bcf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,192 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " applehelp to make an Apple Help Book" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + @echo " coverage to run coverage check of the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/SubsampleBam.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/SubsampleBam.qhc" + +applehelp: + $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp + @echo + @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." + @echo "N.B. You won't be able to view it unless you put it in" \ + "~/Library/Documentation/Help or install it in your application" \ + "bundle." + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/SubsampleBam" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/SubsampleBam" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +coverage: + $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage + @echo "Testing of coverage in the sources finished, look at the " \ + "results in $(BUILDDIR)/coverage/python.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..b8c5fed --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,325 @@ +# -*- coding: utf-8 -*- +# +# build configuration file, created by +# sphinx-quickstart on Fri Mar 27 15:46:28 2015. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os +import shlex + + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. + +sys.path.insert(0, os.path.dirname(os.path.abspath('.'))) +import bioframework + +on_rtd = os.environ.get('READTHEDOCS',None) == 'True' + +if not on_rtd: + try: + import sphinx_rtd_theme + html_theme = 'sphinx_rtd_theme' + html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + except ImportError: + sys.stderr.write('You are missing the sphinx_rtd_theme\n') + sys.stderr.write('Did you pip install docs/requirements.txt?\n') + sys.exit(-1) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + 'sphinx.ext.coverage', + 'sphinx.ext.ifconfig', + 'sphinx.ext.viewcode', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = bioframework.__projectname__ +copyright = '2015, ' + bioframework.__authors__ +author = bioframework.__authors__ + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = bioframework.__version__ +# The full version, including alpha/beta/rc tags. +release = bioframework.__release__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +#html_theme = 'alabaster' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' +#html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# Now only 'ja' uses this config value +#html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +#html_search_scorer = 'scorer.js' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'bioframework' + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', + +# Latex figure (float) alignment +#'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ( + master_doc, + bioframework.__projectname__ + '.tex', + bioframework.__projectname__ + ' Documentation', + bioframework.__authors__, + 'manual' + ), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ( + master_doc, + bioframework.__projectname__, + bioframework.__projectname__ + ' Documentation', + [author], 1 + ) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ( + master_doc, + bioframework.__projectname__, + bioframework.__projectname__ + ' Documentation', + author, + bioframework.__projectname__, + bioframework.__description__, + 'Miscellaneous' + ), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False + + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {'https://docs.python.org/': None} diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..d7202b5 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,21 @@ +.. bioframework documentation master file, created by + sphinx-quickstart on Fri Mar 27 15:46:28 2015. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to bioframework's documentation! +=========================================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..2666e8a --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +sphinx +sphinx_rtd_theme diff --git a/jip_modules/bwa_index.jip b/jip_modules/bwa_index.jip new file mode 100755 index 0000000..83d6f88 --- /dev/null +++ b/jip_modules/bwa_index.jip @@ -0,0 +1,20 @@ +#!/usr/bin/env jip +# bwa index +# +# Usage: +# bwa_index -r [-o ] +# +# Options: +# -r, --reference The fasta file to index +# -o, --output Where to create the index +# [Default: Same path as reference argument] + +#%begin init +options['output'].default = "${reference}.bwt" +#%end + +#%begin validate +#%end + +cp ${reference|abs} ${output|ext} +bwa index ${output|ext} diff --git a/jip_modules/bwa_mem.jip b/jip_modules/bwa_mem.jip new file mode 100755 index 0000000..e8a97ad --- /dev/null +++ b/jip_modules/bwa_mem.jip @@ -0,0 +1,17 @@ +#!/usr/bin/env jip +# bwa mem +# +# Usage: +# bwa_mem -r -f ... [-p] [-o ] +# +# Options: +# -r, --reference Fasta reference path +# -o, --output Output path [default: stdout] +# -f, --fastq ... Fastq files to map to reference [default: stdin] +# -p, --interleaved Flag to specify interleaved input [default: False] + +#%begin init +options['fastq'].streamable = True +#%end + +bwa mem ${interleaved|arg|else('')} ${reference} ${fastq|else("/dev/stdin")} ${output|arg(">")} diff --git a/jip_modules/convert_format.jip b/jip_modules/convert_format.jip new file mode 100755 index 0000000..6d66d41 --- /dev/null +++ b/jip_modules/convert_format.jip @@ -0,0 +1,46 @@ +#!/usr/bin/env jip +# +# Uses biopython to convert between sequence formats based on the +# extensions of the files +# +# Usage: +# convert_format [-i ] [--in-format ] [--out-format ] [-o ] +# +# Options: +# -i, --input= The input to convert. File extension dictates format +# unless stdin is specified, then you must supply +# output format option. +# [Default: stdin] +# --in-format= This is autodetected via the extension of the output +# file, but can be overidden here. +# --out-format= Same as input-format, but specifies the output format +# -o, --output= The output file path. Extension will dictate format. +# If unspecified the output will go to standard output +# as fastq. +# [Default: stdout] +#%begin validate +if not options.input.get() and not options.in_format: + validation_error("Have to supply input format if input is stdin") + +if not options.output.get() and not options.out_format: + validation_error("Have to supply output format if output is stdout") +#%end + +#%begin command python +import sys + +from Bio import SeqIO +from bioframework import util + +if '${in_format}': + informat = "${in_format}" +else: + informat = util.format_from_ext('${input}') + +if '${out_format}': + outformat = "${out_format}" +else: + outformat = util.format_from_ext('${output}') + +SeqIO.convert("${input|else('/dev/stdin')}", informat, "${output|else('/dev/stdout')}", outformat) +#%end diff --git a/jip_modules/cutadapt.jip b/jip_modules/cutadapt.jip new file mode 100755 index 0000000..1d58508 --- /dev/null +++ b/jip_modules/cutadapt.jip @@ -0,0 +1,40 @@ +#!/usr/bin/env jip +# Runs cutadapt with only quality filter +# +# usage: +# cutadapt -q ... [-i ] [-o ] [-p ] [--interleave] +# +# Options: +# -i, --input The input fastq [Default: stdin] +# -q, --qualcutoff The quality cutoff [Default: 25] +# -o, --output Output fastq for single read or interleaved(paired)[Default: stdout] +# -p, --outr2 Reverse output for paired fastq(non-interleaved) +# --interleave Interleave output [Default: False] + +#%begin init +#%end + +#%begin validate +#if options['interleave'] and len(options['fastq']) > 1: +# validation_error("Cannot specify more than 1 fastq file with --interleave") + +if options['interleave'] and len(options['outr2']) > 1: + validation_error("Cannot specify outr2 and --interleave") + +#if len(options['fastq']) != len(options['qualcutoff']): +# validation_error("qualcutoff must match number of fastq given") +#%end + +#%begin setup +''' +if len(options['fastq']) == 2: + options['qualcutoff'].join = ',' + + if not options['interleave']: + if not options['outr2']: + options['outr2'].set('output_r2.cutadapt') + options['output'].append(options['outr2']) +''' +#%end + +cutadapt ${interleave|arg|else('')} ${output|arg(">")} ${outr2|arg|else('')} -q ${qualcutoff} -f fastq ${input|else("/dev/stdin")} diff --git a/jip_modules/paired_to_interleave.jip b/jip_modules/paired_to_interleave.jip new file mode 100755 index 0000000..1ca75be --- /dev/null +++ b/jip_modules/paired_to_interleave.jip @@ -0,0 +1,39 @@ +#!/usr/bin/env jip +# +# Converts 2 fastq paired files into single interleave +# +# Usage: +# paired_to_interleave -f -r [--outformat ] [-o ] +# +# Options: +# -f, --forward The forward fastq input +# -r, --reverse The reverse fastq input +# -o, --output The interleaved output [default: stdout] +# --outformat The output format [default: fastq] + +#%begin validate +if options['outformat'] not in ('fasta', 'fastq'): + validation_error( + "output format can only be fasta or fastq. You provided '%s'" + % options['outformat'] + ) +#%end + +#%begin command python +import itertools + +from Bio import SeqIO + +def interleave(iter1, iter2): + for forward, reverse in itertools.izip(iter1, iter2): + assert forward.id == reverse.id, "%s did not match %s" % \ + (forward.id, reverse.id) + yield forward + yield reverse + +f1, f2 = open("${forward}"), open("${reverse}") +records = interleave(SeqIO.parse(f1, 'fastq'), SeqIO.parse(f2, 'fastq')) +outfile = open("${output|else('/dev/stdout')}", 'w') +count = SeqIO.write(records, outfile, "${outformat}") +outfile.close() +#%end diff --git a/jip_modules/plot_bam.jip b/jip_modules/plot_bam.jip new file mode 100755 index 0000000..c870a3a --- /dev/null +++ b/jip_modules/plot_bam.jip @@ -0,0 +1,14 @@ +#!/usr/bin/env jip +# +# Use gnuplot to plot simple depth histogram +# +# Usage: +# plot_bam [-i ] [-o ] [-f ] +# +# Options: +# -i, --input BAM file input +# -o, --output Output location +# -f, --format Ouput format [default: dumb] + +samtools depth -d 100000 -a ${input|else("/dev/stdin")} | \ +gnuplot -e "set term ${format}; set style data histogram; p '-' using 2:3 with lines;" ${output|arg(">")} diff --git a/jip_modules/sam_to_bam.jip b/jip_modules/sam_to_bam.jip new file mode 100755 index 0000000..3a110bc --- /dev/null +++ b/jip_modules/sam_to_bam.jip @@ -0,0 +1,11 @@ +#!/usr/bin/env jip +# Convert sam input to bam +# +# Usage: +# sam_to_bam [-i ] [-o ] +# +# Options: +# -i, --input Input sam [Default: stdin] +# -o, --output Output bam [Default: stdout] + +samtools view -bh - ${output|arg(">")} diff --git a/jip_modules/simplepipe.jip b/jip_modules/simplepipe.jip new file mode 100755 index 0000000..a50a0af --- /dev/null +++ b/jip_modules/simplepipe.jip @@ -0,0 +1,32 @@ +#!/usr/bin/env jip +# +# Simple pipeline to run cutadapt, bwa index, bwa mem, samtools view +# +# Usage: +# simplepipe -r -f ... [-q ...] [-o ] [--interleave] +# +# Options: +# -r, --reference Reference file to index and map to +# -f, --fastq ... List of fastq files to process +# -q, --qualcutoff ... The quality cutoff for the read trimming +# [Default: 25] +# -o, --output The output bam file +# [Default: mapped.bam] +# -i, --interleave To use interleave format as much as possible for stream +# [Default: False] + +#%begin pipeline +if options['interleave']: + fastq = run('paired_to_interleave', forward=options['fastq'].value[0], reverse=options['fastq'].value[1]) +else: + fastq = options['fastq'] +trimmed = run('cutadapt', input=fastq, qualcutoff=options['qualcutoff'], interleave=options['interleave']) +index_ref = run('bwa_index', reference=options['reference']) +sam = run('bwa_mem', reference=reference, fastq=trimmed) +ubam = run('sam_to_bam', input=sam) +bam = run('sort_bam', input=ubam, output=options['output']) +run('plot_bam', input=bam, output=options['output']+'.svg', format='svg') + +index_ref >> sam + +#%end diff --git a/jip_modules/sort_bam.jip b/jip_modules/sort_bam.jip new file mode 100755 index 0000000..e157a98 --- /dev/null +++ b/jip_modules/sort_bam.jip @@ -0,0 +1,12 @@ +#!/usr/bin/env jip +# +# Sort a bam file +# +# Usage: +# sort_bam [-i ] [-o ] +# +# Options: +# -i, --input Input BAM [Default: stdin] +# -o, --output Output for sorted bam [Default: stdout] + +samtools sort ${input|else('-')} ${output|arg} diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..856f80f --- /dev/null +++ b/setup.py @@ -0,0 +1,19 @@ +from setuptools import setup, find_packages + +import bioframework + +setup( + name = bioframework.__projectname__, + version = bioframework.__release__, + packages = find_packages(), + author = bioframework.__authors__, + author_email = bioframework.__authoremails__, + description = bioframework.__description__, + license = "GPLv2", + keywords = bioframework.__keywords__, + entry_points = { + }, + install_requires = [ + 'pyjip', + ] +) diff --git a/tests/convert_format.robot b/tests/convert_format.robot new file mode 100644 index 0000000..f90e1aa --- /dev/null +++ b/tests/convert_format.robot @@ -0,0 +1,50 @@ +*** Settings *** +Library Process +Library OperatingSystem +Library Collections +Library String +Suite Teardown Terminate All Processes + +*** Variables *** +${jip_dir} = ${CURDIR}/../pyjip +${in_fastq} = ${CURDIR}/testinput/test.fastq +${testout} = ${CURDIR}/testoutput/output +${fastaout} = >id1\nATGC +${tool} = ${jip_dir}/convert_format.jip + +*** Test Cases *** +convert_format file to stdout + Append To Environment Variable PYTHONPATH ${CURDIR}/.. + ${result} = Run Process ${tool} -i ${in_fastq} --out-format fasta shell=True stderr=STDOUT + Log To Console ${result.stdout} + Should Be Equal As Integers ${result.rc} 0 + Should Be Equal As Strings ${result.stdout} ${fastaout} + +convert_format file to file + ${result} = Run Process rm ${testout}.fasta; ${tool} -i ${in_fastq} -o ${testout}.fasta shell=True stderr=STDOUT + Log To Console ${result.stdout} + Should Be Equal As Integers ${result.rc} 0 + ${actual_contents} = Get File ${testout}.fasta + ${actual_contents} = Strip String ${actual_contents} + Should Be Equal As Strings ${fastaout} ${actual_contents} + +convert_format stdin to stdout + ${result} = Run Process cat ${in_fastq} | ${tool} --in-format fastq --out-format fasta shell=True stderr=STDOUT + Log To Console ${result.stdout} + Should Be Equal As Integers ${result.rc} 0 + Should Be Equal As Strings ${result.stdout} ${fastaout} + +convert_format missing outformat + ${result} = Run Process ${tool} -i ${in_fastq} shell=True stderr=STDOUT + Log To Console ${result.stdout} + ${result} = Run Process ${tool} -i ${in_fastq} shell=True stderr=STDOUT + Should Be Equal As Integers ${result.rc} 1 + ${lines} Get Regexp Matches ${result.stdout} Have to supply output format if output is stdout + Should Be Equal As Strings @{lines}[0] Have to supply output format if output is stdout + +convert_format missing informat + ${result} = Run Process cat ${in_fastq} | ${tool} --out-format fasta shell=True stderr=STDOUT + Log To Console ${result.stdout} + Should Be Equal As Integers ${result.rc} 1 + ${lines} Get Regexp Matches ${result.stdout} Have to supply input format if input is stdin + Should Be Equal As Strings @{lines}[0] Have to supply input format if input is stdin diff --git a/tests/requirements-dev.txt b/tests/requirements-dev.txt new file mode 100644 index 0000000..dc8233e --- /dev/null +++ b/tests/requirements-dev.txt @@ -0,0 +1,3 @@ +nose +mock +python-coveralls diff --git a/tests/testinput/test.fastq b/tests/testinput/test.fastq new file mode 100644 index 0000000..4e346d2 --- /dev/null +++ b/tests/testinput/test.fastq @@ -0,0 +1,4 @@ +@id1 +ATGC ++ +!!!! diff --git a/tests/testinput/test.sff b/tests/testinput/test.sff new file mode 100644 index 0000000..17f0fc6 Binary files /dev/null and b/tests/testinput/test.sff differ diff --git a/tests/testoutput/output.fasta b/tests/testoutput/output.fasta new file mode 100644 index 0000000..bb59a8e --- /dev/null +++ b/tests/testoutput/output.fasta @@ -0,0 +1,2 @@ +>id1 +ATGC