Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
3862f71
Add kMetaShot and DM for kMetaShot
SantaMcCloud Oct 30, 2025
f251d0e
fix test and remove unneeded file
SantaMcCloud Oct 30, 2025
4ef6892
Update tools/kmetashot/kmetashot.xml
SantaMcCloud Nov 4, 2025
7e91306
Update tools/kmetashot/kmetashot.xml
SantaMcCloud Nov 4, 2025
a961b6c
Update data_managers/data_manager_kmetashot/data_manager_conf.xml
SantaMcCloud Nov 4, 2025
2953837
make file names shell safe
SantaMcCloud Nov 4, 2025
5e9d599
Merge branch 'add_kMetaShot' of https://github.com/SantaMcCloud/tools…
SantaMcCloud Nov 4, 2025
cfa9bf1
change help text for param
SantaMcCloud Nov 4, 2025
e10b88e
more information about the output
SantaMcCloud Nov 4, 2025
0acaf08
rewrote DM and change help section
SantaMcCloud Nov 4, 2025
4d0b6eb
add pyscript
SantaMcCloud Nov 4, 2025
6a8350d
fix test
SantaMcCloud Nov 4, 2025
a56eec4
fix linting
SantaMcCloud Nov 4, 2025
0acc0e1
typo
SantaMcCloud Nov 4, 2025
be65946
fix assert_content in test
SantaMcCloud Nov 4, 2025
c22e642
change DM to a single file tool
SantaMcCloud Nov 5, 2025
61157d7
remove .py file§
SantaMcCloud Nov 5, 2025
ed60219
chage profile version
SantaMcCloud Nov 5, 2025
e3d8e77
remove output label
SantaMcCloud Nov 5, 2025
a230944
change single quotes to double quotes
SantaMcCloud Nov 5, 2025
102fb8f
restart test
SantaMcCloud Nov 7, 2025
2ed91cf
fix test
SantaMcCloud Nov 7, 2025
e9bbec2
Merge branch 'add_kMetaShot' of https://github.com/SantaMcCloud/tools…
SantaMcCloud Nov 7, 2025
b1b05c1
maybe test fix
SantaMcCloud Nov 7, 2025
587e629
maybe test fix
SantaMcCloud Nov 7, 2025
4e6cfc8
maybe test fix
SantaMcCloud Nov 7, 2025
d7821e7
fix test
SantaMcCloud Nov 9, 2025
bd8a260
change download link from version 1
SantaMcCloud Nov 13, 2025
77d0f1b
Update kmetashot.xml
SantaMcCloud Nov 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions data_managers/data_manager_kmetashot/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
categories:
- Data Managers
- Metagenomics
homepage_url: https://github.com/gdefazio/kMetaShot
description: Data manager for kMetaShot reference data
long_description: Data manager for kMetaShot reference data
name: kmetashot_build_database
owner: iuc
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_kmetashot
type: unrestricted
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
<tool id="kmetashot_build_database" name="kMetaShot" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
<description>database builder</description>
<macros>
<token name="@TOOL_VERSION@">2.0</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@PROFILE@">24.1</token>
</macros>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">kmetashot</requirement>
</requirements>
<command><![CDATA[
mkdir -p "$out_file.extra_files_path" &&
#if $test != "true":
#if $release == "1":
wget "https://zenodo.org/records/17591095/files/kMetaShot_reference.h5" &&
mv "kMetaShot_reference.h5" "$out_file.extra_files_path" &&
#else:
wget "https://zenodo.org/records/17375120/files/kMetaShot_bacteria_archaea_2025-05-22.h5" &&
mv "kMetaShot_bacteria_archaea_2025-05-22.h5" "$out_file.extra_files_path" &&
#end if
#else:
touch '$out_file.extra_files_path'/kMetaShot_bacteria_archaea_2025-05-22.h5 &&
#end if
cp "$dmjson" "$out_file"
]]></command>
<configfiles>
<configfile name="dmjson"><![CDATA[
{
"data_tables":{
"kmetashot":[
{
"dbkey":"kmetashot",
"version":"${release}",
#if $test == "true":
"path":"${out_file.extra_files_path}/kMetaShot_bacteria_archaea_2025-05-22.h5",
"name":"kMetaShot reference data 2025-05-22 - TEST",
"value":"2025-05-22"
#else:
#if $release == "1":
"path":"${out_file.extra_files_path}/kMetaShot_reference.h5",
"name":"kMetaShot reference data 2022-07-31",
"value":"2022-07-31"
#else:
"path":"${out_file.extra_files_path}/kMetaShot_bacteria_archaea_2025-05-22.h5",
"name":"kMetaShot reference data 2025-05-22",
"value":"2025-05-22"
#end if
#end if
}
]
}
}]]>
</configfile>
</configfiles>
<inputs>
<param name="release" type="select" multiple="false" label="kMetaShot reference data release">
<option value="1">First release</option>
<option value="2">Second release</option>
</param>
<param name="test" type="hidden" value="" checked="false" label="Run test"/>
</inputs>
<outputs>
<data name="out_file" format="data_manager_json" />
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="release" value="2"/>
<param name="test" value="true"/>
<output name="out_file">
<assert_contents>
<has_text text="25-05-22"/>
<has_text text="kMetaShot reference data 2025-05-22 - TEST"/>
</assert_contents>
</output>
</test>
</tests>
<help><![CDATA[
Download and extract kMetaShot reference data.
]]></help>
<citations>
<citation type="doi">10.1038/s41592-023-01940-w</citation>
</citations>
</tool>
20 changes: 20 additions & 0 deletions data_managers/data_manager_kmetashot/data_manager_conf.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<data_managers>
<data_manager tool_file="data_manager/kmetashot_datamanager.xml" id="kmetashot_build_database">
<data_table name="kmetashot">
<output>
<column name="value"/>
<column name="dbkey"/>
<column name="name"/>
<column name="version"/>
<column name="path" output_ref="out_file">
<move type="file">
<source>${path}</source>
<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">kmetashot/${value}/${path}</target>
</move>
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/kmetashot/${value}/${path}</value_translation>
<value_translation type="function">abspath</value_translation>
</column>
</output>
</data_table>
</data_manager>
</data_managers>
3 changes: 3 additions & 0 deletions data_managers/data_manager_kmetashot/test-data/kmetashot.loc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

25-05-22 kMetaShot-25-05-22 kMetaShot reference data 2025-05-22 2 /tmp/tmpf_hplx2a/galaxy-dev/tool-data/kmetashot/2/kMetaShot_bacteria_archaea_2025-05-22.h5
2025-05-22 kmetashot kMetaShot reference data 2025-05-22 - TEST 2 /home/sf373/sf373/galaxy/tool-data/kmetashot/2025-05-22/tmp/tmpt500ppxr/job_working_directory/000/1/outputs/dataset_46c18f80-8c8e-416d-ad88-f91119feab82_files/kMetaShot_bacteria_archaea_2025-05-22.h5
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#This is a sample file distributed with Galaxy that enables tools
#to use a the kMetaShot database.
#You will need to create these data files using the following command

#wget [selected version] [url_from_donwlaod]

#The <version> column indicates the version from the kMetaShot ref data was downloaded

#25-05-22 kMetaShot-25-05-22 kMetaShot reference data 2025-05-22 2 /mnt/galaxyIndices/kMetaShot_database/kMetaShot_bacteria_archaea_2025-05-22.h5
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<tables>
<table name="kmetashot" comment_char="#" allow_duplicate_entries="False">
<columns>value, dbkey, name, version, path</columns>
<file path="tool-data/kmetashot.loc" />
</table>
</tables>
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<tables>
<!-- Location of kmetashot indexes for testing -->
<table name="kmetashot" comment_char="#" allow_duplicate_entries="False">
<columns>value, dbkey, name, version, path</columns>
<file path="${__HERE__}/test-data/kmetashot.loc" />
</table>
</tables>
13 changes: 13 additions & 0 deletions tools/kmetashot/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: kmetashot
owner: iuc
description: an alignment-free taxonomic classifier based on k-mer/minimizer counting
long_description: |
kMetaShot, a bioinformatic approach relying on k-mer/minimizer profiling
from the reference prokaryotic genomes, in order to build a concise
representation of genomic diversity and perform MAG taxonomic
classification up to the strain level
homepage_url: https://github.com/gdefazio/kMetaShot
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/kmetashot
categories:
- Metagenomics
type: unrestricted
92 changes: 92 additions & 0 deletions tools/kmetashot/kmetashot.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
<tool id="kmetashot" name="kMetaShot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
<description>an alignment-free taxonomic classifier based on k-mer/minimizer counting</description>
<macros>
<token name="@TOOL_VERSION@">2.0</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@PROFILE@">24.1</token>
</macros>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">kmetashot</requirement>
</requirements>
<command detect_errors="exit_code">
<![CDATA[
#import re

mkdir "output" "bins" &&

#for $file in $bins_dir:
#set $identifier = re.sub("[^\s\w\-]", "_", str($file.element_identifier))
ln -s "$file" "bins/${identifier}.$file.ext" &&
#end for

kMetaShot_classifier_NV.py
-b "bins"
-o "output"
-r "$reference.fields.path"
-p "\${GALAXY_SLOTS:-1}"
-a ${ass2ref}

]]>
</command>
<inputs>
<param argument="--bins_dir" type="data" multiple="true" format="fasta,fasta.gz" label="Bin(s)/MAG(s) fasta file"/>
<param argument="--reference" type="select" label="Select reference">
<options from_data_table="kmetashot">
<filter type="sort_by" column="2"/>
</options>
<validator type="no_options" message="No reference data for kMetaShot is installed. Please contact the Galaxy adminstrators to request one be installed."/>
</param>
<param argument="--ass2ref" type="float" min="0.0" value="0.0" max="1.0" label="Set ass2ref parameter" help="Set the number of non redundant minimizers found in classified MAG for classified strain"/>
</inputs>
<outputs>
<collection name="result" type="list">
<discover_datasets pattern="(?P&lt;designation&gt;.*)\.csv" format="tabular" directory="output"/>
</collection>
</outputs>
<tests>
<!-- Since this tool need his ref data to work there is no way to test this tool really because of this there is only this test to see of the tool is starting or not -->
<test expect_exit_code="1" expect_failure="true">
<param name="bins_dir" value="all_contig.fasta.gz" ftype="fasta.gz"/>
<param name="ass2ref" value="0.2"/>
<assert_command>
<has_text text="kMetaShot_classifier_NV.py -b bins"/>
<has_text text="-o output"/>
<has_text text="-a 0.2"/>
</assert_command>
</test>
<test expect_exit_code="1" expect_failure="true">
<param name="bins_dir" value="all_contig.fasta.gz" ftype="fasta.gz"/>
<param name="ass2ref" value="0.3"/>
<assert_command>
<has_text text="kMetaShot_classifier_NV.py -b bins"/>
<has_text text="-o output"/>
<has_text text="-a 0.3"/>
</assert_command>
</test>
</tests>
<help>
<![CDATA[

To learn more about the inside of the tool you can visit the `kMetaShot GitHub page <https://github.com/gdefazio/kMetaShot>`_!

**Input**

Fasta file(s) in fasta format or/and fasta.gz format (.fa, .fasta, .fna, .fa.gz, .fasta.gz, .fna.gz are allowed extensions)

**Reference**

The reference data needed for this tool must be provided from the data manager which always installs the latest version of the data

**Output**

The Output is a collection with csv file(s) which contained the classification for the inputted bin(s)/MAG(s)

In the output files(s) each line has the followed structure: num,bin,ass2ref,taxid,species,genus,family,order,class,phylum,superkingdom,organism_name
This means each bin will be classified by this tool and all ids (NCBI) will be written in this order if the tool can classified it together with the organism name!

]]>
</help>
<citations>
<citation type="doi">10.1093/bib/bbae680</citation>
</citations>
</tool>
Binary file added tools/kmetashot/test-data/all_contig.fasta.gz
Binary file not shown.