Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions client/src/components/Collections/pairing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import type { GenericPair } from "@/components/Collections/common/buildCollectio
export const COMMON_FILTERS = {
illumina: ["_1", "_2"] as [string, string],
Rs: ["_R1", "_R2"] as [string, string],
Fs: ["_F", "_R"] as [string, string],
dot12s: [".1.fastq", ".2.fastq"] as [string, string],
};
export type CommonFiltersType = keyof typeof COMMON_FILTERS;
Expand All @@ -16,26 +17,31 @@ export function guessInitialFilterType(elements: HasName[]): CommonFiltersType |
let illumina = 0;
let dot12s = 0;
let Rs = 0;
let Fs = 0;

//should we limit the forEach? What if there are 1000s of elements?
elements.forEach((element) => {
if (element.name?.includes(".1.fastq") || element.name?.includes(".2.fastq")) {
dot12s++;
} else if (element.name?.includes("_R1") || element.name?.includes("_R2")) {
Rs++;
} else if (element.name?.includes("_F") || element.name?.includes("_R")) {
Fs++;
} else if (element.name?.includes("_1") || element.name?.includes("_2")) {
illumina++;
}
});
// if we cannot filter don't set an initial filter and hide all the data
if (illumina === 0 && dot12s === 0 && Rs === 0) {
if (illumina === 0 && dot12s === 0 && Rs === 0 && Fs === 0) {
return null;
} else if (illumina > dot12s && illumina > Rs) {
} else if (illumina > dot12s && illumina > Rs && illumina > Fs) {
return "illumina";
} else if (dot12s > illumina && dot12s > Rs) {
} else if (dot12s > illumina && dot12s > Rs && dot12s > Fs) {
return "dot12s";
} else if (Rs > illumina && Rs > dot12s) {
} else if (Rs > illumina && Rs > dot12s && Rs > Fs) {
return "Rs";
} else if (Fs > illumina && Fs > dot12s && Fs > Rs) {
return "Fs";
} else {
return "illumina";
}
Expand Down
16 changes: 12 additions & 4 deletions lib/galaxy/model/dataset_collections/auto_pairing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class HasName(Protocol):
COMMON_FILTERS: dict[str, tuple[str, str]] = {
"illumina": ("_1", "_2"),
"Rs": ("_R1", "_R2"),
"Fs": ("_F", "_R"),
"dot12s": (".1.fastq", ".2.fastq"),
}

Expand Down Expand Up @@ -116,25 +117,32 @@ def guess_initial_filter_type(elements: list[T]) -> Optional[str]:
illumina = 0
dot12s = 0
Rs = 0
Fs = 0

# Iterate through elements and count occurrences of filter patterns
# Order matters: more specific patterns must be checked before less specific ones
# (_R1/_R2 before _F/_R since _R is a substring of _R1)
for element in elements:
if ".1.fastq" in element.name or ".2.fastq" in element.name:
dot12s += 1
elif "_R1" in element.name or "_R2" in element.name:
Rs += 1
elif "_F" in element.name or "_R" in element.name:
Fs += 1
elif "_1" in element.name or "_2" in element.name:
illumina += 1

# Determine the most likely filter type
if illumina == 0 and dot12s == 0 and Rs == 0:
if illumina == 0 and dot12s == 0 and Rs == 0 and Fs == 0:
return None
elif illumina > dot12s and illumina > Rs:
elif illumina > dot12s and illumina > Rs and illumina > Fs:
return "illumina"
elif dot12s > illumina and dot12s > Rs:
elif dot12s > illumina and dot12s > Rs and dot12s > Fs:
return "dot12s"
elif Rs > illumina and Rs > dot12s:
elif Rs > illumina and Rs > dot12s and Rs > Fs:
return "Rs"
elif Fs > illumina and Fs > dot12s and Fs > Rs:
return "Fs"
else:
return "illumina"

Expand Down
32 changes: 32 additions & 0 deletions lib/galaxy/model/dataset_collections/auto_pairing_spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,35 @@
input:
forward: input_1.fastq.bz2
reverse: input_2.fastq.bz2

- doc: Simple _F/_R split.
inputs:
- input_F.fastq
- input_R.fastq
paired:
input:
forward: input_F.fastq
reverse: input_R.fastq

- doc: Compressed _F/_R split (.gz).
inputs:
- ERR042228_F.fq.gz
- ERR042228_R.fq.gz
paired:
ERR042228:
forward: ERR042228_F.fq.gz
reverse: ERR042228_R.fq.gz

- doc: Multiple _F/_R pairs with common prefixes.
inputs:
- ERR042228_F.fq.gz
- ERR042228_R.fq.gz
- ERR636028_F.fq.gz
- ERR636028_R.fq.gz
paired:
ERR042228:
forward: ERR042228_F.fq.gz
reverse: ERR042228_R.fq.gz
ERR636028:
forward: ERR636028_F.fq.gz
reverse: ERR636028_R.fq.gz
Loading