- 02 Dec, 2021 1 commit
-
-
Leonard Herault authored
-
- 29 Nov, 2021 7 commits
-
-
Adrien Mazuel authored
-
Adrien Mazuel authored
-
Adrien Mazuel authored
-
Adrien Mazuel authored
-
Adrien Mazuel authored
-
Adrien Mazuel authored
-
Adrien Mazuel authored
import re # Get samples and ages from config file samples = config["samples"] samples = samples.split(",") print(samples) SMPATAC = config["samplesATAC"] SMPATAC = SMPATAC.split(",") print(SMPATAC) ages = config["ages"] ages = ages.split(",") print(ages) # Get conda env file from config file seurat_monocle_env = config["seurat_monocle_env"] pyscenic_env = config["pyscenic_env"] signacSeurat4_env = config["signacSeurat4_env"] streamAnalysis_env = config["streamAnalysis_env"] feather_database = config["scenic"]["cis_target_url"].split("/")[-1] print(feather_database) # Define base directory baseDir = config["dir"]["base"] os.chdir(baseDir) print(samples) scenicRuns = list(range(50)) rule all: input: #"report/final_report.html", "output/RNA/Seurat4_integration/combined.rds", #"output/RNA/MonocleIntegration/monocleWithCounts.rds", #"output/RNA/regulonAnalysis/mainRegulonTable.tsv", #"output/RNA/regulonAnalysis/clusterMarkerRegulonTable.tsv", "output/ATAC/integration/atacCombined.rds", #"output/TransferRnaAtac/coembedAll.rds", #"output/motifsEnv_installed", #"output/TransferRnaAtac/SignacMotif/atac_motif.rds", #expand("output/ATAC/smp/{smpAtac}/SignacMotif/atac_motif.rds", smpAtac = SMPATAC), "output/RNA/regulonAnalysis/mainRegulonTable.tsv", #"output/RNA/regulonAnalysis/clusterMarkerRegulonTable.tsv", # "output/Nestorowa/seuratForCastle.rds" #"output/signacSeurat4_installed_v2", #"output/TransferRnaAtac/coembedAll.rds", #"output/motifsEnv_installed_v2", #"output/TransferRnaAtac/SignacMotif/atac_motif.rds", #"output/TransferRnaAtac/SignacMotif_2/atac_motif.rds" #"output/RNA/regulonAnalysis/seuratAUC.rds", "output/ATAC/integration_v2/atac_merged_integrated.rds" rule untar: input: "input/RAW/{smp}/barcodes.tsv.gz", "input/RAW/{smp}/features.tsv.gz", "input/RAW/{smp}/matrix.mtx.gz", output:"input/RAW/{smp}/barcodes.tsv", "input/RAW/{smp}/features.tsv", "input/RAW/{smp}/matrix.mtx", shell: "cd input/RAW/{wildcards.smp};gunzip *.gz" rule install_seurat4: conda: signacSeurat4_env output: "output/signacSeurat4_installed" shell: "Rscript R_src/installSignacSeurat4.R;touch {output}" rule load_10X_data: input: "input/RAW/{smp}/barcodes.tsv", "input/RAW/{smp}/features.tsv", "input/RAW/{smp}/matrix.mtx", "output/signacSeurat4_installed" output: "input/10X_data_{smp}.rds" params: age = lambda wildcards : config[wildcards.smp]["age"], runDate = lambda wildcards : config[wildcards.smp]["date"], cellranger = config["cellrangerVersion"], condition = lambda wildcards : config[wildcards.smp]["condition"] threads: 1 conda: signacSeurat4_env shell: "Rscript R_src/load_10X_Seurat.R -c {params.cellranger} -b input/RAW/{wildcards.smp} -f {output} \ -s age={params.age},runDate={params.runDate},sampleName={wildcards.smp},condition={params.condition}" rule prepare_data_for_seurat: input: data = "input/10X_data_{smp}.rds" output: "output/RNA/{smp}/dataPreparation/seurat_treated.rds" threads: 1 conda: seurat_monocle_env shell: "Rscript R_src/prepareSeuratCL.R -i {input.data} -o output/RNA/{wildcards.smp}/dataPreparation -m 0.1 -u 2 -l 3 -m 5" rule download_signatures: params: geiger = config["publicSignatures"]["geiger_url"], chambers = config["publicSignatures"]["chambers_url"], output: geiger = "publicData/geiger_signatures.xls", chambers = "publicData/chambers_signatures.xls", shell: """wget -cO - {params.geiger} > {output.geiger}; wget -cO - {params.chambers} > {output.chambers}""" rule get_signatures: input: geiger = "publicData/geiger_signatures.xls", chambers = "publicData/chambers_signatures.xls", output: "output/signatures/publicSignatures.rds" threads: 12 conda: signacSeurat4_env shell : "Rscript R_src/getSignaturesCL.R -t {input.geiger} -m {input.chambers} -o output/signatures/" rule seurat4: input: dataPrepared = "output/RNA/{smp}/dataPreparation/seurat_treated.rds", sig = "output/signatures/publicSignatures.rds", output: "output/RNA/{smp}/Seurat4/seurat.rds" conda: signacSeurat4_env params: num_dim = config["seurat3"]["dim"], cr = config["seurat3"]["correction"], minPropCell = config["seurat3"]["minPropCellExp"], res = config["seurat3"]["resolution"] threads: 1 shell: "Rscript R_src/Seurat4CL.R -i {input.dataPrepared} \ -o output/RNA/{wildcards.smp}/Seurat4 -n {params.num_dim} \ -b {params.cr} -s {input.sig}\ -r {params.res} -z logNorm" rule seurat4_integration: input: expand("output/RNA/{smp}/Seurat4/seurat.rds",smp=samples) output: "output/RNA/Seurat4_integration/combined.rds" conda: signacSeurat4_env params: cr = config["seurat_integration"]["correction"], num_dim_smp = config["seurat_integration"]["dim"], num_dim_int = config["seurat_integration"]["num_dim_int"], smpList = config["seurat_integration"]["smp_list"], res = config["seurat_integration"]["resolution"] threads: 1 shell: "Rscript R_src/Seurat4_integration.R -i {params.smpList} -n {params.num_dim_smp} -N {params.num_dim_int} -o output/RNA/Seurat4_integration -r {params.res} -b {params.cr} -z logNorm" rule seurat4_analysis: input: seurat = "output/RNA/Seurat4_integration/combined.rds" output: "output/RNA/Seurat4_integration/Analysis/seurat_annotated.rds" conda: signacSeurat4_env params: res = config["seurat_analysis"]["resolution"], logFC = config["seurat_analysis"]["logfc_threshold"], padj = config["seurat_analysis"]["pval_adj"], assay = config["seurat_analysis"]["assay"], clusters = config["seurat_analysis"]["clusters"], signatures = config["seurat_analysis"]["signatures"] threads: 1 shell: "Rscript R_src/seuratAnalysisCL.R -i {input.seurat} -o output/RNA/Seurat4_integration/Analysis/ \ -r {params.res} -s {params.signatures} -c {params.clusters} \ -f {params.logFC} -p {params.padj} -a {params.assay}" rule prepare_STREAM_analysis: input: seurat = "output/RNA/Seurat4_integration/Analysis/seurat_annotated.rds" output: "output/RNA/prepareSTREAM/All_colorCellCyclePhases.tsv" conda: signacSeurat4_env params: prefix = config["prepare_STREAM"]["prefix"] threads: 1 shell: "Rscript R_src/prepareSTREAManalysisCL.R -i {input.seurat} -o output/RNA/prepareSTREAM/ -p {params.prefix}" rule run_STREAM_analysis: input: matrix = "output/RNA/streamFiltering/All_cells_scaleDataForStream.tsv" output: "output/RNA/streamAnalysis/stream_metadata.csv" conda: streamAnalysis_env params: cells = config["STREAM_analysis"]["cells_clusters"], colors = config["STREAM_analysis"]["colors_clusters"], epg_alpha = config["STREAM_analysis"]["epg_alpha"], epg_mu = config["STREAM_analysis"]["epg_mu"], epg_lambda = config["STREAM_analysis"]["epg_lambda"] threads: 1 shell: "python py_src/streamPseudotime.py -i {input.matrix} -o output/RNA/streamAnalysis/ \ -c {params.cells} -v {params.colors} -a {params.epg_alpha} -m {params.epg_mu} -l {params.epg_lambda}" rule seurat_STREAM: input: stream = "output/RNA/streamAnalysis/stream_metadata.csv", seurat = "output/RNA/Seurat4_integration/Analysis/seurat_annotated.rds" output: "output/RNA/streamAnalysisSeurat/seuratPseudotime.rds" conda: signacSeurat4_env params: metaData = config["seurat_STREAM"]["metaData"], refPseudotime = config["seurat_STREAM"]["refPseudotime"], treeShape = config["seurat_STREAM"]["treeShape"] threads: 1 shell: "Rscript R_src/streamSeuratCL.R -i {input.seurat} -s {input.stream} -o output/RNA/streamAnalysisSeurat/ \ -m {params.metaData} -p {params.refPseudotime} -t {params.treeShape}" rule prepare_nestorowa_data: input: "input/Nestorowa/GSE81682_HTSeq_counts.txt","input/Nestorowa/all_cell_types.txt" output: "output/Nestorowa/seuratForActinn.rds" conda: signacSeurat4_env shell: "Rscript R_src/prepareNestorowaCL.R -i {input[0]} -m {input[1]} -o output/Nestorowa" ####################### Scenic part of the workflow #################################### rule get_databases_for_scenic: output: feather="publicData/database/mm9-tss-centered-10kb-7species.mc9nr.feather", tbl="publicData/database/motifs-v9-nr.mgi-m0.001-o0.0.tbl" params: feather=config["scenic"]["cis_target_url"], tbl=config["scenic"]["motif_url"], sums=config["scenic"]["sums"], feather_name=feather_database shell: """cd publicData/database; \ wget {params.feather}; \ wget {params.tbl}; \ wget {params.sums}; \ awk -v feather_database={params.feather_name} '$2 == feather_database' sha256sum.txt | sha256sum -c -""" rule prepare_tf_list: input: "publicData/database/motifs-v9-nr.mgi-m0.001-o0.0.tbl" output: "output/publicData/mm_mgi_tfs.txt" conda: pyscenic_env shell: "python py_src/getTfList.py -i {input} -o {output}" rule filter_genes_for_scenic_all: input: "output/RNA/Seurat4_integration/combined.rds", "output/publicData/mm_mgi_tfs.txt" output: "output/RNA/ScenicAll/expressionRawCountFilteredForScenic.tsv", conda: signacSeurat4_env shell: "Rscript R_src/filterSeuratForScenicCL.R -i {input[0]} -t {input[1]} -o output/RNA/ScenicAll/" rule split_by_condition: input: "output/RNA/ScenicAll/expressionRawCountFilteredForScenic.tsv", output: "output/RNA/ScenicRNA_multipleRuns_{smp}/expressionRawCountFilteredForScenic.tsv" conda: pyscenic_env shell: "head -1 {input} > {output}; cat {input} | grep '^{wildcards.smp}' >> {output}" rule multiple_grnboost_rna: input: "output/RNA/ScenicAll/expressionRawCountFilteredForScenic.tsv", "output/publicData/mm_mgi_tfs.txt" output: "output/RNA/ScenicRNA_multipleRuns/GRNboost/run{run}_GRNboost.tsv" threads: 20 conda: pyscenic_env shell: "pyscenic grn --seed {wildcards.run} --num_workers {threads} {input[0]} {input[1]} -o {output}" rule multiple_grnboost_rna_by_condition: input: "output/RNA/ScenicRNA_multipleRuns_{smp}/expressionRawCountFilteredForScenic.tsv", "output/publicData/mm_mgi_tfs.txt" output: "output/RNA/ScenicRNA_multipleRuns_{smp}/GRNboost/run{run}_GRNboost.tsv" threads: 20 conda: pyscenic_env shell: "pyscenic grn --seed {wildcards.run} --num_workers {threads} {input[0]} {input[1]} -o {output}" rule multiple_prune_modules_rna_maskDropouts_with_cis_target_csv: input: exp="output/RNA/ScenicAll/expressionRawCountFilteredForScenic.tsv", GRN_boost= "output/RNA/ScenicRNA_multipleRuns/GRNboost/run{run}_GRNboost.tsv", TF_motif="publicData/database/motifs-v9-nr.mgi-m0.001-o0.0.tbl", cis_motif="publicData/database/mm9-tss-centered-10kb-7species.mc9nr.feather" output:"output/RNA/ScenicRNA_multipleRuns/cis_target_maskDropouts/run{run}_regulons.csv" threads:10 conda:pyscenic_env shell: "pyscenic ctx --annotations_fname {input.TF_motif} -a --mask_dropouts\ --num_workers {threads} \ --expression_mtx_fname {input.exp} -o {output} \ {input.GRN_boost} {input.cis_motif}" rule multiple_prune_modules_rna_maskDropouts_with_cis_target_csv_by_condition: input: exp="output/RNA/ScenicRNA_multipleRuns_{smp}/expressionRawCountFilteredForScenic.tsv", GRN_boost="output/RNA/ScenicRNA_multipleRuns_{smp}/GRNboost/run{run}_GRNboost.tsv", TF_motif="publicData/database/motifs-v9-nr.mgi-m0.001-o0.0.tbl", cis_motif="publicData/database/mm9-tss-centered-10kb-7species.mc9nr.feather" output:"output/RNA/ScenicRNA_multipleRuns_{smp}/cis_target_maskDropouts/run{run}_regulons.csv" threads:12 conda:pyscenic_env shell: "pyscenic ctx --annotations_fname {input.TF_motif} -a --mask_dropouts\ --num_workers {threads} \ --expression_mtx_fname {input.exp} -o {output} \ {input.GRN_boost} {input.cis_motif}" rule multiple_motifEnriched2regulons_rna_maskDropouts: input: "output/RNA/ScenicRNA_multipleRuns/cis_target_maskDropouts/run{run}_regulons.csv" output: "output/RNA/ScenicRNA_multipleRuns/cis_target_maskDropouts/run{run}_regulons.json" conda:pyscenic_env shell: "python py_src/motif2regulon.py -i {input} -o {output}" rule multiple_motifEnriched2regulons_rna_maskDropouts_by_condition: input: "output/RNA/ScenicRNA_multipleRuns_{smp}/cis_target_maskDropouts/run{run}_regulons.csv" output: "output/RNA/ScenicRNA_multipleRuns_{smp}/cis_target_maskDropouts/run{run}_regulons.json" conda:pyscenic_env shell: "python py_src/motif2regulon.py -i {input} -o {output}" rule merging_multiple_runs: input: files = expand("output/RNA/ScenicRNA_multipleRuns/cis_target_maskDropouts/run{run}_regulons.json",run = scenicRuns), adjacencies = expand("output/RNA/ScenicRNA_multipleRuns/GRNboost/run{run}_GRNboost.tsv",run = scenicRuns) output: "output/RNA/ScenicRNA_multipleRuns/cis_target_maskDropouts/aggregatedRegulons.json", "output/RNA/ScenicRNA_multipleRuns/cis_target_maskDropouts/aggregatedRegulonsMeta.json", "output/RNA/ScenicRNA_multipleRuns/cis_target_maskDropouts/adja.sqlite" shell: "python py_src/aggregateMultiRun.py -r output/RNA/ScenicRNA_multipleRuns/cis_target_maskDropouts/ -c 0.8 -a output/RNA/ScenicRNA_multipleRuns/GRNboost/ -o output/RNA/ScenicRNA_multipleRuns/cis_target_maskDropouts/" rule merging_multiple_runs_by_condition: input: files = expand("output/RNA/ScenicRNA_multipleRuns_{smp}/cis_target_maskDropouts/run{run}_regulons.json",run = scenicRuns,smp =samples), adjacencies = expand("output/RNA/ScenicRNA_multipleRuns_{smp}/GRNboost/run{run}_GRNboost.tsv",run = scenicRuns,smp =samples), output: "output/RNA/ScenicRNA_multipleRuns_{smp}/cis_target_maskDropouts/aggregatedRegulons.json", "output/RNA/ScenicRNA_multipleRuns_{smp}/cis_target_maskDropouts/aggregatedRegulonsMeta.json", "output/RNA/ScenicRNA_multipleRuns_{smp}/cis_target_maskDropouts/adja.sqlite" shell: "python py_src/aggregateMultiRun.py -r output/RNA/ScenicRNA_multipleRuns_{wildcards.smp}/cis_target_maskDropouts/\ -a output/RNA/ScenicRNA_multipleRuns_{wildcards.smp}/GRNboost/ -c 0.8 \ -o output/RNA/ScenicRNA_multipleRuns_{wildcards.smp}/cis_target_maskDropouts/" rule multiple_aucell_TF_seurat_regulon_rna_maskDropouts: input:"output/RNA/ScenicAll/expressionRawCountFilteredForScenic.tsv", "output/RNA/ScenicRNA_multipleRuns/cis_target_maskDropouts/aggregatedRegulons.json" output:"output/RNA/ScenicRNA_multipleRuns/AUCell_maskDropouts/regulons_enrichment.csv" threads:12 conda:pyscenic_env shell: "python3.6 py_src/aucellJson.py -c {threads} -s 2020 -e {input[0]} -r {input[1]} -o output/RNA/ScenicRNA_multipleRuns/AUCell_maskDropouts/" rule creating_regulonTable: input: main = "output/RNA/ScenicRNA_multipleRuns/cis_target_maskDropouts/aggregatedRegulonsMeta.json", supp = expand("output/RNA/ScenicRNA_multipleRuns_{smp}/cis_target_maskDropouts/aggregatedRegulonsMeta.json",smp=samples) output: "output/RNA/regulonAnalysis/mainRegulonTable.tsv" params: condName = "RA+CT", tf_database = "output/publicData/mm_mgi_tfs.txt", supp = "+".join(expand("output/RNA/ScenicRNA_multipleRuns_{smp}/cis_target_maskDropouts/aggregatedRegulonsMeta.json",smp=samples)) conda: seurat_monocle_env shell: "Rscript R_src/makeRegulonTableCL.R -o output/RNA/regulonAnalysis/ -t {params.tf_database} -r {input.main} -s {params.supp} -n {params.condName}" rule analyseRegulonScore: input: seurat = "report/seurat_promyelocytes.rds", tfList = "input/selectedTF.txt", regulonScore = "output/RNA/ScenicRNA_multipleRuns/AUCell_maskDropouts/regulons_enrichment.csv" output: "output/RNA/regulonAnalysis/clusterMarkerRegulonTable.tsv" params: scoreDiff = config["regulonAnalysis"]["scoreDiff"] conda: seurat_monocle_env shell: "Rscript R_src/regulonMarkerCL.R -i {input.seurat} -t {input.tfList} \ -r {input.regulonScore} -o output/RNA/regulonAnalysis/ -c -a condition" ####################### ATAC seq part of the workflow #################################### rule process_atac_smp: input:"input/RAW_atac/{smpAtac}/filtered_peak_bc_matrix.h5", "input/RAW_atac/{smpAtac}/singlecell.csv", "input/RAW_atac/{smpAtac}/fragments.tsv.gz", "output/signacSeurat4_installed" output:"output/ATAC/smp/{smpAtac}/atac.rds" params: age = lambda wildcards : config[wildcards.smpAtac]["age"], runDate = lambda wildcards : config[wildcards.smpAtac]["date"], condition = lambda wildcards : config[wildcards.smpAtac]["condition"] threads:12 conda: signacSeurat4_env shell: "Rscript R_src/signacSmpCL.R -i input/RAW_atac/{wildcards.smpAtac}/ -o output/ATAC/smp/{wildcards.smpAtac}/ \ -u 2 -l 2 -t 2 -c 2 -r 30 -b 0.05 \ -s age={params.age},runDate={params.runDate},sampleName={wildcards.smpAtac},condition={params.condition}" rule integrate_atac: input: expand("output/ATAC/smp/{smpAtac}/atac.rds",smpAtac = SMPATAC), "output/signacSeurat4_installed" output:"output/ATAC/integration/atacCombined.rds" params: smpList = config["signac_integration"]["smp_list"], minCutOff = config["signac_integration"]["minCutOff"], subsetInterPeaks = config["signac_integration"]["subsetInterPeaks"] conda: signacSeurat4_env shell: "Rscript R_src/signac_integration.R -i {params.smpList} -o output/ATAC/integration/ -m {params.minCutOff} -s {params.subsetInterPeaks}" rule transfer_rna_atac: input: "output/RNA/Seurat4_integration/combined.rds", expand("output/ATAC/smp/{smpAtac}/atac.rds",smpAtac = SMPATAC), "output/signacSeurat4_installed_v2" output:"output/TransferRnaAtac/coembedAll.rds" conda: signacSeurat4_env shell: "Rscript R_src/transferRnaAtacCL.R -i {input[0]} -a {input[1]} -b {input[2]} -d -N 50 -r integrated_snn_res.0.3 -o output/TransferRnaAtac/" rule install_signac_motif_packages: conda: signacSeurat4_env output: "output/motifsEnv_installed" shell: "Rscript R_src/installMotifsEnv.R;touch {output}" rule signac_motif_integrated: input: "output/TransferRnaAtac/atac_integrated.rds" output: "output/TransferRnaAtac/SignacMotif/atac_motif.rds" params: clustName = config["signac_motif_integrated"]["cluster_name"], clustLevel = config["signac_motif_integrated"]["cluster_level"], clustCol = config["signac_motif_integrated"]["cluster_col"] conda: signacSeurat4_env shell: "Rscript R_src/SignacMotifAnnotCL.R -i {input} -o output/TransferRnaAtac/SignacMotif -s 9606 -a peaks -c {params.clustCol} -n {params.clustName} -l {params.clustLevel} -I TRUE" rule seurat_signac_annotAUC_enhancer_promoter: input: "output/TransferRnaAtac/SignacMotif_2/atac_motif.rds", "output/RNA/Seurat4_integration/Report_20210412/annotated_update.rds", "output/RNA/ScenicRNA_multipleRuns/AUCell_maskDropouts/regulons_enrichment.csv" output: "output/RNA/regulonAnalysis/seuratAUC.rds" conda: signacSeurat4_env shell: "Rscript R_src/prepareAUCseuratEnhancePromoseuratCL.R -i {input[1]} -a {input[0]} -r {input[2]} -o output/"
-
- 25 Nov, 2021 7 commits
-
-
Leonard Herault authored
-
Leonard Herault authored
-
Leonard Herault authored
-
Leonard Herault authored
-
Leonard Herault authored
-
Leonard Herault authored
-
Leonard Herault authored
-