diff --git a/R_src/DoMultipleBarHeatmap.R b/R_src/DoMultipleBarHeatmap.R
new file mode 100644
index 0000000000000000000000000000000000000000..7a1fd93a301e0404fac2a3bcdaa726ff36d54b4a
--- /dev/null
+++ b/R_src/DoMultipleBarHeatmap.R
@@ -0,0 +1,179 @@
+# Adapted from https://github.com/satijalab/seurat/issues/2201
+DoMultiBarHeatmap <- function (object, 
+                               features = NULL, 
+                               cells = NULL, 
+                               group.by = "ident", 
+                               additional.group.by = NULL, 
+                               group.bar = TRUE, 
+                               disp.min = -2.5, 
+                               disp.max = NULL, 
+                               slot = "scale.data", 
+                               assay = NULL, 
+                               label = TRUE, 
+                               size = 5.5, 
+                               hjust = 0, 
+                               angle = 45, 
+                               raster = TRUE, 
+                               draw.lines = TRUE, 
+                               lines.width = NULL, 
+                               group.bar.height = 0.02, 
+                               combine = TRUE,
+                               group.col = NULL
+                               ) 
+{
+  cells <- cells %||% colnames(x = object)
+  if (is.numeric(x = cells)) {
+    cells <- colnames(x = object)[cells]
+  }
+  assay <- assay %||% DefaultAssay(object = object)
+  DefaultAssay(object = object) <- assay
+  features <- features %||% VariableFeatures(object = object)
+  ## Why reverse???
+  features <- rev(x = unique(x = features))
+  disp.max <- disp.max %||% ifelse(test = slot == "scale.data", 
+                                   yes = 2.5, no = 6)
+  possible.features <- rownames(x = GetAssayData(object = object, 
+                                                 slot = slot))
+  if (any(!features %in% possible.features)) {
+    bad.features <- features[!features %in% possible.features]
+    features <- features[features %in% possible.features]
+    if (length(x = features) == 0) {
+      stop("No requested features found in the ", slot, 
+           " slot for the ", assay, " assay.")
+    }
+    warning("The following features were omitted as they were not found in the ", 
+            slot, " slot for the ", assay, " assay: ", paste(bad.features, 
+                                                             collapse = ", "))
+  }
+  data <- as.data.frame(x = as.matrix(x = t(x = GetAssayData(object = object, 
+                                                             slot = slot)[features, cells, drop = FALSE])))
+  
+  object <- suppressMessages(expr = StashIdent(object = object, 
+                                               save.name = "ident"))
+    
+  group.by <- group.by %||% "ident"
+  groups.use <- object[[c(group.by, additional.group.by)]][cells, , drop = FALSE]
+  plots <- list()
+  for (i in group.by) {
+    data.group <- data
+    group.use <- groups.use[, c(i, additional.group.by), drop = FALSE]
+    
+    for(colname in colnames(group.use)){
+      if (!is.factor(x = group.use[[colname]])) {
+        group.use[[colname]] <- factor(x = group.use[[colname]])
+      }  
+    }
+    
+    if (draw.lines) {
+      lines.width <- lines.width %||% ceiling(x = nrow(x = data.group) * 
+                                                0.0025)
+      placeholder.cells <- sapply(X = 1:(length(x = levels(x = group.use[[i]])) * 
+                                           lines.width), FUN = function(x) {
+                                             return(Seurat:::RandomName(length = 20))
+                                           })
+      placeholder.groups <- data.frame(foo=rep(x = levels(x = group.use[[i]]), times = lines.width))
+      placeholder.groups[additional.group.by] = NA
+      colnames(placeholder.groups) <- colnames(group.use)
+      rownames(placeholder.groups) <- placeholder.cells
+      
+      group.levels <- levels(x = group.use[[i]])
+      
+      group.use <- sapply(group.use, as.vector)
+      rownames(x = group.use) <- cells
+      
+      group.use <- rbind(group.use, placeholder.groups)
+      
+      na.data.group <- matrix(data = NA, nrow = length(x = placeholder.cells), 
+                              ncol = ncol(x = data.group), dimnames = list(placeholder.cells, 
+                                                                           colnames(x = data.group)))
+      data.group <- rbind(data.group, na.data.group)
+    }
+    
+    
+    
+    group.use <- group.use[with(group.use, eval(parse(text=paste('order(', paste(c(i, additional.group.by), collapse=', '), ')', sep='')))), , drop=F]
+
+    
+    plot <- Seurat:::SingleRasterMap(data = data.group, raster = raster, 
+                                     disp.min = disp.min, disp.max = disp.max, feature.order = features, 
+                                     cell.order = rownames(x = group.use), group.by = group.use[[i]])
+    
+    if (group.bar) {
+      
+      if(is.null(group.col)) {
+        colPal <- scales::hue_pal()
+      } else {
+        colPal <- function(n) {
+          return(group.col[c(1:n)])
+        }
+      }
+      pbuild <- ggplot_build(plot = plot)
+      group.use2 <- group.use
+      cols <- list()
+      na.group <- Seurat:::RandomName(length = 20)
+      for (colname in rev(x = colnames(group.use2))){
+        if (colname == group.by){
+          colid = colname
+          colPal2 <- colPal
+        } else {
+          colid = colname
+          colPal2 <- function(n) {
+            return(rev(scales::hue_pal()(n)))
+          }
+        }
+        
+        if (draw.lines) {
+          levels(x = group.use2[[colname]]) <- c(levels(x = group.use2[[colname]]), na.group)  
+          group.use2[placeholder.cells, colname] <- na.group
+          cols[[colname]] <- c(colPal2(length(x = levels(x = group.use[[colname]]))), "#FFFFFF")
+        } else {
+          cols[[colname]] <- c(colPal2(length(x = levels(x = group.use[[colname]]))))
+        }
+        names(x = cols[[colname]]) <- levels(x = group.use2[[colname]])
+        y.range <- diff(x = pbuild$layout$panel_params[[1]]$y.range)
+        y.pos <- max(pbuild$layout$panel_params[[1]]$y.range) + y.range * 0.015
+        y.max <- y.pos + group.bar.height * y.range
+        pbuild$layout$panel_params[[1]]$y.range <- c(pbuild$layout$panel_params[[1]]$y.range[1], y.max)
+        #print(cols[[colname]][group.use2[[colname]]])
+        plot <- suppressMessages(plot + 
+                                   annotation_raster(raster = t(x = cols[[colname]][group.use2[[colname]]]),  xmin = -Inf, xmax = Inf, ymin = y.pos, ymax = y.max) + 
+                                   annotation_custom(grob = grid::textGrob(label = colid, hjust = 0, gp = grid::gpar(cex = 0.75)), ymin = mean(c(y.pos, y.max)), ymax = mean(c(y.pos, y.max)), xmin = Inf, xmax = Inf) +
+                                   coord_cartesian(ylim = c(0, y.max), clip = "off")) 
+        
+        #temp <- as.data.frame(cols[[colname]][levels(group.use[[colname]])])
+        #colnames(temp) <- 'color'
+        #temp$x <- temp$y <- 1
+        #temp[['name']] <- as.factor(rownames(temp))
+        
+        #temp <- ggplot(temp, aes(x=x, y=y, fill=name)) + geom_point(shape=21, size=5) + labs(fill=colname) + theme(legend.position = "bottom")
+        #legend <- get_legend(temp)
+        #multiplot(plot, legend, heights=3,1)
+        
+        if ((colname == group.by) && label) {
+          x.max <- max(pbuild$layout$panel_params[[1]]$x.range)
+          x.divs <- pbuild$layout$panel_params[[1]]$x.major
+          group.use$x <- x.divs
+          label.x.pos <- tapply(X = group.use$x, INDEX = group.use[[colname]],
+                                FUN = median) * x.max
+          label.x.pos <- data.frame(group = names(x = label.x.pos), 
+                                    label.x.pos)
+          label.x.pos$group <- str_split_fixed(label.x.pos$group,"_",2)[,2]
+          plot <- plot + geom_text(stat = "identity", 
+                                   data = label.x.pos, aes_string(label = "group", 
+                                                                  x = "label.x.pos"), y = y.max + y.max * 
+                                     0.03 * 0.5, angle = angle, hjust = hjust, 
+                                   size = size)
+          plot <- suppressMessages(plot + coord_cartesian(ylim = c(0, 
+                                                                   y.max + y.max * 0.002 * max(nchar(x = levels(x = group.use[[colname]]))) * 
+                                                                     size), clip = "off"))
+        }
+      }
+    }
+    plot <- plot + theme(line = element_blank())
+    plots[[i]] <- plot
+  }
+  if (combine) {
+    plots <- CombinePlots(plots = plots)
+  }
+  return(plots)
+}
diff --git a/R_src/Enrichment.R b/R_src/Enrichment.R
new file mode 100644
index 0000000000000000000000000000000000000000..b412636836161261ffa8e4d9ec08bc49fac8edf9
--- /dev/null
+++ b/R_src/Enrichment.R
@@ -0,0 +1,156 @@
+# function to make gene enrichment analyzis with gProfileR
+
+## function to perform gprofler enrcihment analysis on each gene cluster of 
+## a differentially expressed gene result (eg data frame of DE genes with a column cluster)
+getGeneClustGprofile <- function(deg_clust,
+                                 background,
+                                 organism = "mmusculus",
+                                 hier_filtering = "none",
+                                 ordered_query = F) {
+  
+  resEnrichTest <- lapply(split(as.vector(deg_clust$Gene),f= deg_clust$Cluster),gprofiler,
+                    organism = "mmusculus",
+                    custom_bg = background,
+                    ordered_query = ordered_query,
+                    hier_filtering = hier_filtering)
+  return(resEnrichTest)
+}
+
+
+## function to plot gprofiler results
+plotWriteResEnrich <- function(resEnrichTest,
+                          sources=c("BP","keg"),
+                          outdir = "./gprofiler",
+                          clusterLabel =names(resEnrichTest),
+                          colors = brewer.pal(length(resEnrichTest)+1, "Set1")) {
+  
+  dir.create(outdir,recursive = T,showWarnings = F)
+  
+  colors <- colors
+
+  for (cluster in clusterLabel) {
+    write.table(file= paste(outdir,"/gprofiler_table_clust_",cluster,".tsv", sep = ""),
+                            resEnrichTest[[cluster]],quote = F,sep = '\t',row.names = F)
+    for (s in sources) {
+      results <- resEnrichTest[[cluster]][which(resEnrichTest[[cluster]][,"domain"] == s),]
+      results <- results[order(results$p.value),]
+      results <- as.data.frame(results[,c("term.id","term.name","p.value")])
+      results$logPval <- -log(base = 10,x = results$p.value)
+      if(s == "tf") {
+        results$term.name <- paste(results$term.name,results$term.id)
+      }
+      
+      ## TO DO : if to much term plot only the first n 
+      
+      if (length(results$term.name) > 55) {
+        png(paste(outdir,"/gprofiler_top30_",cluster,"_",s,".png",sep =""),width = 600,height = 600)
+        gp <- ggplot(results[c(1:30),], aes(x=reorder(term.name, logPval), y=logPval)) +
+          geom_bar(stat='identity',fill = colors[which(names(resEnrichTest)==cluster)] ) +
+          coord_flip() +
+          xlab("term name") +
+          ylab("-log10(p value)") +
+          theme(text = element_text(size = 20))
+        print(gp)
+        
+        dev.off()
+        
+        png(paste(outdir,"/gprofiler_",cluster,"_",s,".png",sep =""),width = 1400,height = 1400)
+        
+      } else {
+        png(paste(outdir,"/gprofiler_",cluster,"_",s,".png",sep =""),width = 600,height = 600)
+      }
+      gp <- ggplot(results, aes(x=reorder(term.name, logPval), y=logPval)) +
+        geom_bar(stat='identity',fill = colors[which(names(resEnrichTest)==cluster)] ) +
+        coord_flip() +
+        xlab("term name") +
+        ylab("-log10(p value)") +
+        theme(text = element_text(size = 20))
+      print(gp)
+      
+      dev.off()
+    }
+    
+  }
+}
+
+## function to composate
+gProfileAnalysis <- function(deg_clust,
+                             background,
+                             organism = "mmusculus",
+                             hier_filtering = "moderate",
+                             ordered_query = F,
+                             sources=c("BP","keg","CC",'MF','tf'),
+                             outdir = "/gprofiler",
+                             clusterLabel =names(resEnrichTest),
+                             colors = brewer.pal(length(resEnrichTest)+1, "Set1")) {
+  
+  resEnrichTest <- getGeneClustGprofile(deg_clust = deg_clust,
+                                        background = background,
+                                        organism = organism,
+                                        hier_filtering = hier_filtering,
+                                        ordered_query = ordered_query)
+  
+  plotWriteResEnrich(resEnrichTest,
+                     sources=sources,
+                     outdir = outdir,
+                     clusterLabel =clusterLabel,
+                     colors = colors)
+  
+  return(resEnrichTest)
+  
+}
+
+# function for hypergeometric test
+
+# hypergeometric test https://dputhier.github.io/ASG/practicals/go_statistics_td/go_statistics_td_2015.html
+
+#DEPRECATED Seurat 2
+# testHyper <- function(gene_set, signature, background,seurat) {
+#     signature <- signature[which(is.element(signature,set = rownames(seurat@data)))]
+#     genesMarked <- gene_set[which(is.element(gene_set,set = signature))]
+#     p.value <-  phyper(q=length(genesMarked) -1, 
+#                        m=length(signature),                  
+#                        n=length(background) - length(signature), k= length(gene_set), lower.tail=FALSE)
+#     return(p.value)
+# }
+
+#For Seurat 3
+testHyper3 <- function(gene_set, signature, background,seurat) {
+  signature <- signature[which(is.element(signature,set = rownames(seurat)))]
+  genesMarked <- gene_set[which(is.element(gene_set,set = signature))]
+  p.value <-  phyper(q=length(genesMarked) -1, 
+                     m=length(signature),                 
+                     n=length(background) - length(signature), k= length(gene_set), lower.tail=FALSE)
+  return(p.value)
+}
+
+
+
+testHyperCells <- function(cells_pull, cells_setTarget, allCells) {
+  cellsMarked <- gene_set[which(is.element(gene_set,set = cells_set))]
+  p.value <-  phyper(q=length(cellsMarked) -1, 
+                     m=length(cells_setTarget),
+                     n=length(allCells) - length(cells_setTarget), k= length(cells_pull), lower.tail=FALSE)
+  return(p.value)
+}
+
+## DEPRECATED Seurat2
+# testHyperSig <- function(signature,seurat,markers,clust) {  
+#   result <- testHyper(gene_set=markers[which(markers$Cluster == clust),"Gene"],
+#                       signature=signature,seurat=seurat,background=rownames(seurat@data))
+#   return(result)
+# }
+
+#For Seurat 3
+testHyperSig3 <- function(signature,seurat,markers,clust) { 
+  result <- testHyper3(gene_set=markers[which(markers$Cluster == clust),"Gene"],
+                       signature=signature,seurat=seurat,background=rownames(seurat))
+  return(result)
+}
+
+
+
+
+  
+
+
diff --git a/R_src/Seurat3CL.R b/R_src/Seurat3CL.R
new file mode 100644
index 0000000000000000000000000000000000000000..d4237602e4685ba99958404d14ba687ad316cedb
--- /dev/null
+++ b/R_src/Seurat3CL.R
@@ -0,0 +1,519 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(monocle))
+suppressMessages(library(Seurat))
+suppressMessages(library(BiocParallel))
+suppressMessages(library(getopt))
+suppressMessages(library(gridExtra))
+suppressMessages(library(gProfileR))
+suppressMessages(library(RColorBrewer))
+suppressMessages(library(readxl))
+suppressMessages(library(stringr))
+suppressMessages(library(scales))
+suppressMessages(library(grid))
+
+
+
+source("R_src/getSigPerCells.R")
+source("R_src/Enrichment.R")
+source("R_src/funForSeurat.R")
+
+# Seurat 3 analysis of an individual sample required for using CaSTLe script.
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputRDS',  'i', 1, "character", "REQUIRED : 10X data prepared as monocle or seurat object (.RDS generated by prepare_data.R).",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "num_dim",      'n',1,"numeric", "Number of dimension to use for ordering (eg n first pc of PCA on input data) 10 by default",
+  "correction",      "b",1,"character", "Covariable to use as blocking factor (eg one or several columns of pData: betch, cell cycle phases... separated by +)",
+  "minPropCellExp", "p",1,"numeric", "minimal proportion of cells that expressed the genes kept for the analaysis 0.001 by default",
+  "norm_method",    "z",1, "character", "normalisation method, logNorm seurat (by default) or sctransform",
+  "resolution", "r", 1,"numeric", "resolution for Seurat clustering 0.9 by default",
+  "signaturesFile", "s", 1, "character", "path to folder with signature list store as rds object",
+  "identRemoved",    "d", 1, "character", "Optionnal cluster to remove only work if input is a seurat object",
+  "nonExpressedGenesRemoved", "e", 0,"logical", "non expressed gene already removed default to FALSE",
+  "gprofiler", "g", 0, "logical", "if true doing gprofiler default to true",
+  "logfc_threshold", "l", 1, "numeric", "logfc threshold for finding cluster markers (1 cluster vs all deg) 0.25 by default",
+  "rodriguezSig", "k", 1, "character", "path for xls file of Rodriguez result"
+  
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputRDS)) {
+  cat("For an individual sample, perform PCA, tSNE, UMAP, Louvain clustering, diff expression between clusters with Seurat3 package with filtered poor quality cells data (gbm_cds monocle object)")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+
+#set default arguments values
+
+if (is.null(opt$num_dim)) {
+  opt$num_dim <- 10
+}
+
+if (is.null(opt$resolution)) {
+  opt$resolution <- 0.8
+}
+
+if (is.null(opt$minPropCellExp)) {
+  opt$minPropCellExp <- 0.001
+  print(opt$minPropCellExp)
+}
+
+
+if (is.null(opt$logfc_threshold)) {
+  opt$logfc_threshold = 0.25
+} 
+
+print(paste("logfc threshold:", opt$logfc_threshold) )
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+
+
+if (is.null(opt$nonExpressedGenesRemoved)) {
+  opt$nonExpressedGenesRemoved = F
+} else {
+  opt$nonExpressedGenesRemoved = T
+}
+
+if (is.null(opt$gprofiler)) {
+  opt$gprofiler <- T
+} else {
+  opt$gprofiler <- F
+}
+
+print(opt$gprofiler)
+print(opt$nonExpressedGenesRemoved)
+
+
+# get correction vector
+if (!is.null(opt$correction)) {
+  corrections <- strsplit(x = opt$correction,split = "\\+")[[1]]
+} else {
+  corrections <- NULL
+}
+
+# create blank for the plots
+blank <- grid.rect(gp=gpar(col="white"))
+
+dir.create(opt$outdir,recursive = T,showWarnings = F)
+
+object <- readRDS(opt$inputRDS)
+
+# input is a monocle object in the workflow
+# Code to use this script with seurat input (eg to re analyse)
+if (is(object)[1]== "CellDataSet") {
+  
+  print("input monocle")
+  gbm_cds <- readRDS(opt$inputRDS)
+  
+} else {
+  if(is(object) == "Seurat") {
+    #It is easier to pass by a monocle object and then reget the seurat object with the genes filtered
+    print("input seurat object")
+    seurat <- object
+    
+    if (!is.null(opt$identRemoved)) {
+      print(unique(seurat@active.ident))
+      print(paste("removing identities",opt$identRemoved))
+      seurat <- SubsetData(seurat,ident.remove = strsplit(x = opt$identRemoved,split = ",")[[1]],subset.raw = T) # may not work with seurat3
+    }
+    
+    
+    pd <- new("AnnotatedDataFrame", data = gbm_cds@meta.data)
+    fd <- new("AnnotatedDataFrame", data = data.frame(gene_short_name = rownames(gbm_cds))) 
+    rownames(fd) <- fd$gene_short_name
+    
+    gbm_cds <- newCellDataSet(GetAssayData(gbm_cds,slot = "counts"),
+                              phenoData = pd,
+                              featureData = fd,
+                              lowerDetectionLimit = 0.1,
+                              expressionFamily = negbinomial.size())
+    
+    gbm_cds <- detectGenes(gbm_cds, min_expr = 0.1)
+    
+    
+    
+  }
+}
+
+if(opt$nonExpressedGenesRemoved == F) {
+  print("remove non expressed genes (non expressed in at least X% of the cells X user option in monocle dp feature 5% in seurat tutorial 0,1%)")
+  fData(gbm_cds)$use_for_seurat <- fData(gbm_cds)$num_cells_expressed > opt$minPropCellExp * ncol(gbm_cds)
+  
+  gbm_to_seurat <- gbm_cds[fData(gbm_cds)$use_for_seurat==T,]
+} else {
+  gbm_to_seurat <- gbm_cds
+}
+
+if (is.element("Cluster",colnames(pData(gbm_to_seurat)))) {
+  colnames(pData(gbm_to_seurat))[which(colnames(pData(gbm_to_seurat))=="Cluster")] <- "Cluster_monocle"
+}
+
+##Convert to seurat3
+#check for dup genes
+
+# Only needed if ensemble id (it is the case in the workflow)
+
+dupGeneNames <- fData(gbm_to_seurat)[which((duplicated(featureData(gbm_to_seurat)$gene_short_name))),"gene_short_name"]
+
+if(length(dupGeneNames) == 0) {
+  rownames(gbm_to_seurat) <- fData(gbm_to_seurat)$gene_short_name
+  
+} else {
+  write.csv(fData(gbm_to_seurat)[which(is.element(fData(gbm_to_seurat)$gene_short_name,dupGeneNames)),],
+            paste(opt$outdir,"/dupGenesName.csv",sep = "")
+  )
+  print("Dup gene short names existing, making them unique...")
+  rownames(gbm_to_seurat) <- make.unique(fData(gbm_to_seurat)$gene_short_name, sep = "--")
+  
+}
+
+
+seurat <- CreateSeuratObject(counts = exprs(gbm_to_seurat), meta.data = pData(gbm_to_seurat))
+
+
+
+#################################################################################################
+####################################### Seurat Workflow #########################################
+#################################################################################################
+
+if (opt$norm_method == "sctransform") {
+  
+  seurat <- SCTransform(object = seurat, vars.to.regress = c("G2M_score","S_score","G1_score"))
+  
+} else {
+  
+  seurat <- NormalizeData(object = seurat)
+  seurat <- FindVariableFeatures(object = seurat,selection.method = "vst", nfeatures = 2000, verbose = T)
+  seurat <- ScaleData(object = seurat,vars.to.regress = corrections)
+  
+}
+
+seurat <- RunPCA(object = seurat)
+
+
+png(paste(opt$outdir,"/ElbowPlot.png",sep =""))
+ElbowPlot(object = seurat,ndims = 30)
+dev.off()
+
+
+print("Clustering...")
+
+seurat <- FindNeighbors(object = seurat,dims = c(1:opt$num_dim),k.param = 20)
+seurat <- FindClusters(object = seurat,resolution = c(0.5,0.6,0.7,0.8,0.9,1,1.2))
+
+print("Running TSNE...")
+
+seurat <- RunTSNE(seurat,dims = c(1:opt$num_dim))
+
+print("Running UMAP...")
+
+seurat <- RunUMAP(seurat,dims = c(1:opt$num_dim))
+
+print("UMAP ok")
+print(colnames(seurat@meta.data))
+
+colPrefix <- "RNA_snn_res."
+if(opt$norm_method == "sctransform") {
+  colPrefix <- "SCT_snn_res."
+}
+
+umapListRes <- list()
+tsneListRes <- list()
+for (r in c(0.6,0.8,1,1.2)) {
+    umapListRes[[as.character(r)]] <- DimPlot(seurat,
+                              reduction = "umap",
+                              label = T,
+                              group.by = paste(colPrefix,r,sep="")) + 
+      NoLegend() +
+      ggtitle(paste("res",r))
+    
+    tsneListRes[[as.character(r)]] <- DimPlot(seurat,
+                                reduction = "tsne",
+                                label = T,
+                                group.by = paste(colPrefix,r,sep="")) + 
+      NoLegend() +
+      ggtitle(paste("res",r))
+}
+
+png(paste(opt$outdir,"/tsne_different_res.png",sep = ""),height = 800,width = 800)
+grid.arrange(tsneListRes[[1]],tsneListRes[[2]],tsneListRes[[3]],tsneListRes[[4]])
+dev.off()
+
+png(paste(opt$outdir,"/umap_different_res.png",sep = ""),height = 800,width = 800)
+grid.arrange(umapListRes[[1]],umapListRes[[2]],umapListRes[[3]],umapListRes[[4]])
+dev.off()
+
+
+
+
+Idents(seurat) <- paste(colPrefix,opt$resolution,sep = "")
+seurat@meta.data$numclust <- seurat@meta.data[,paste(colPrefix,opt$resolution,sep = "")]
+
+
+#Check for unwanted source of variation
+pPhases <- DimPlot(seurat,group.by = "phases")
+if (!is.null(seurat@meta.data$predicted)) {
+  pPred <- DimPlot(seurat,group.by = "predicted")
+} else {
+  pPred <- blank
+}
+
+pUMI <- FeaturePlot(seurat, "Total_mRNAs")
+pMito <- FeaturePlot(seurat, "percentMito")
+
+png(paste(opt$outdir,"/umap_factors.png",sep = ""),height = 800,width = 800)
+grid.arrange(pPhases,pPred,pUMI,pMito)
+dev.off()
+
+#Check for unwanted source of variation
+pPhases <- DimPlot(seurat,group.by = "phases",reduction = "tsne")
+if (!is.null(seurat@meta.data$predicted)) {
+  pPred <- DimPlot(seurat,group.by = "predicted",reduction = "tsne")
+} else {
+  pPred <- blank
+}
+pUMI <- FeaturePlot(seurat, "Total_mRNAs",reduction = "tsne")
+pMito <- FeaturePlot(seurat, "percentMito",reduction = "tsne")
+
+png(paste(opt$outdir,"/tsne_factors.png",sep = ""),height = 800,width = 800)
+grid.arrange(pPhases,pPred,pUMI,pMito)
+dev.off()
+
+
+#Check for genes
+
+gene_list<- c("Pdzk1ip1","Mllt3",
+                     "Ctla2a","Cd27","Cd34",
+                     "Lig1","Hells","Tyms",
+                     "Notch2","Lst1",
+                     "Irf7","Stat1",
+                     "Pf4","Itga2b",
+                     "Klf1","Gata1",
+                     "Mpo","Cd48",
+                     "Fcer1a","Hdc",
+                     "Il7r","Thy1", # Ccr9 not found
+                     "Cdc20","Ccnb1","Racgap1",
+                     "Mzb1","Ly6d", "Trp53inp1",
+                     "Jun","Fos","Nr4a1")
+
+gene_list <- gene_list[which(is.element(set=rownames(seurat),el = gene_list))] 
+
+dir.create(paste(opt$outdir,"/genes/umap/",sep =""),recursive = T)
+dir.create(paste(opt$outdir,"/genes/tsne/",sep =""),recursive = T)
+for (g in gene_list) {
+  png(paste(opt$outdir,"/genes/umap/",g,".png",sep = ""))
+  plot(FeaturePlot(seurat,features = g))
+  dev.off()
+}
+
+for (g in gene_list) {
+  png(paste(opt$outdir,"/genes/tsne/",g,".png",sep = ""))
+  plot(FeaturePlot(seurat,features = g),reduction = "tsne")
+  dev.off()
+}
+
+markers <- FindAllMarkers(seurat,only.pos = T,logfc.threshold= opt$logfc_threshold)
+markers <- markers[which(markers$p_val_adj < 0.05),]
+
+write.table(x = markers,paste(opt$outdir,"/markers.tsv", sep =""),sep = "\t",quote = F,row.names = F,col.names = T)
+
+
+dir.create(paste(opt$outdir,"/markers/",sep = ""))
+
+ylab <- "LogNormalized UMI counts"
+if (opt$norm_method == "sctransform") {
+  ylab <- "Expression level"
+}
+
+for (numClust in unique(markers$cluster)) {
+  print(head(markers[which(markers$cluster == numClust),],n=9))
+  png(paste(opt$outdir,"/markers/Cluster_",numClust,"_topGenesVlnPlot.png",sep =""),width = 1000, height = 1000)
+  plot(VlnPlot(object = seurat, features = head(markers[which(markers$cluster == numClust),"gene"],n=9),pt.size = 0.5) + 
+         labs(x = "Clusters",y= ylab,colour = "black") +
+         theme(axis.text = element_text(size=20),
+               plot.title = element_text(size=25)) )
+  dev.off()
+  
+}
+
+#################################################################################################
+##########                                Enrichment                                   ##########
+#################################################################################################
+
+## Add signatures
+dir.create(paste(opt$outdir,"/cellSignatures/",sep = ""))
+signatures <- readRDS(opt$signaturesFile)
+
+
+for (sig in c(1:length(signatures))) {
+  sigName <- names(signatures)[sig]
+  signature <- signatures[[sig]]
+  seurat <- scoreCells3(seurat,signature,outdir= paste(opt$outdir,"/cellSignatures/",sep=""),sigName)
+}
+
+## Enrichment
+
+# Modify colnames of markers to use gProfileAnalysis function
+firstup <- function(x) {
+  substr(x, 1, 1) <- toupper(substr(x, 1, 1))
+  x
+}
+
+colnames(markers) <- firstup(colnames(markers))
+
+
+#be careful background
+if(opt$gprofiler) {
+gprofiler_result <- gProfileAnalysis(deg_clust = markers,
+                                     outdir = paste(opt$outdir,"/gProfileR", sep =""),
+                                     background = rownames(seurat),
+                                     colors = hue_pal()(length(unique(markers$Cluster))))
+
+saveRDS(gprofiler_result,file=paste(opt$outdir,"/gProfileR/gprofiler_results.rds",sep =""))
+}
+
+
+
+# Test Rodriguez cluster sig
+if(!is.null(opt$rodriguezSig)) {
+  
+  clusterNames <- c("C1","C2","C3","Mk","Er","Ba","Neu","Mo1","Mo2", "preDC","preB","preT")
+  
+  RodriguezClustersSig <- lapply(X= c(1:length(clusterNames)),FUN = read_xlsx,path = opt$rodriguezSig)
+  
+  names(RodriguezClustersSig) <- clusterNames 
+  
+  getOnlyPos <- function(clustersSig) {
+    clusterSig <- clustersSig[which(clustersSig$log.effect > 0),]
+    return(clusterSig)
+  }
+  
+  RodriguezClustersSigPos <- lapply(X= RodriguezClustersSig, getOnlyPos)
+  
+  
+  signaturesRodriguez <- lapply(RodriguezClustersSigPos,"[[",1 )
+  
+  
+  firstup <- function(x) {
+    substr(x, 1, 1) <- toupper(substr(x, 1, 1))
+    x
+  }
+  
+  colnames(markers) <- firstup(colnames(markers))
+  
+  getClustEnrichForRodriguez <- function(clust,signatures,seurat,markers) {
+    clustSig <- lapply(signatures,testHyperSig3,seurat,markers,clust)
+    if (!is.null(seurat@meta.data$predicted)) {
+    propCellTypesLearned <- table(seurat@meta.data[which(seurat@meta.data$numclust==clust),"predicted"])/length(seurat@meta.data[which(seurat@meta.data$numclust==clust),"predicted"])
+    } else{
+      propCellTypesLearned <- NULL
+    }
+    clustInfo <- c(clustSig,propCellTypesLearned)
+    return(clustInfo)
+  }
+  
+  clust_list <- lapply(unique(markers$Cluster),getClustEnrichForRodriguez,signature=signaturesRodriguez,seurat =seurat,markers =markers) ##markers arg forgotten in testHyper sig
+
+  names(clust_list) <- paste("cluster_",unique(markers$Cluster),sep="")
+  
+  clust_table <- as.data.frame(matrix(unlist(clust_list), nrow=length(unlist(clust_list[1]))))
+  colnames(clust_table) <- names(clust_list)
+  rownames(clust_table) <- names(clust_list[[1]])
+  
+  clust_df <- as.data.frame(t(clust_table))
+  
+  write.csv(clust_df,file = paste(opt$outdir,"/clustInfoRodriguez.csv",sep =""),quote = F)
+  
+}
+
+
+#Clusters table summary
+
+clust_table <- data.frame()
+
+print("Creating cluster table summary")
+
+
+getClustInfo <- function(clust,signatures,seurat,markers) {
+  
+  clustInfo <- list()
+  clustInfo$num_cells <- dim(seurat@meta.data[which(seurat@active.ident==clust),])[1]
+  clustInfo$percent_cells <- clustInfo$num_cells/dim(seurat@meta.data)[1]
+  percentPhases <- table(seurat@meta.data[which(seurat@active.ident==clust),"phases"])/length(seurat@meta.data[which(seurat@active.ident==clust),"phases"]) #In fact this is fraction not percentage
+  
+  if(!is.null(seurat@meta.data$predicted)) {
+    percentPredicted <- table(seurat@meta.data[which(seurat@active.ident==clust),"predicted"])/length(seurat@meta.data[which(seurat@active.ident==clust),"predicted"]) #In fact this is fraction not percentage
+    
+    for (p in unique(seurat@meta.data$predicted)) {
+      print(p)
+      if (is.element(p,names(percentPhases))) {
+        clustInfo[[p]] <- percentPhases[p] 
+      } else {
+        clustInfo[[p]] <- 0
+      }
+    }
+    
+  }
+  
+  for (p in c("G1_G0","S","G2_M")) {
+    if (is.element(p,names(percentPhases))) {
+      clustInfo[[p]] <- percentPhases[p] 
+    } else {
+      clustInfo[[p]] <- 0
+    }
+  }
+  
+  
+  clustInfo$median_genes_expressed <- median(seurat@meta.data[which(seurat@active.ident==clust),"numGenesPerCells"])
+  clustInfo$median_nUMI <- median(seurat@meta.data[which(seurat@active.ident==clust),"Total_mRNAs"])
+  clustInfo$median_percentMitochGenes <- median(seurat@meta.data[which(seurat@active.ident==clust),"percentMito"])
+  
+  
+  clustSig <- lapply(signatures,testHyperSig3,seurat,markers,clust) 
+  
+  clustInfo <- c(clustInfo,clustSig)
+  
+}
+
+allSignatures <- c(signatures,signaturesRodriguez)
+
+clust_list <- lapply(levels(unique(seurat@active.ident)),getClustInfo,allSignatures,seurat,markers)
+
+names(clust_list) <- paste("cluster_",levels(unique(seurat@active.ident)),sep="")
+
+saveRDS(clust_list,paste(opt$outdir,"/clust_list_save.rds",sep =""))
+
+clust_table <- as.data.frame(matrix(unlist(clust_list), nrow=length(unlist(clust_list[1]))))
+colnames(clust_table) <- names(clust_list)
+rownames(clust_table) <- names(clust_list[[1]])
+
+
+clust_df <- as.data.frame(t(clust_table))
+
+write.table(x = clust_df,file = paste(opt$outdir,"/clusters_table.tsv",sep =""),sep="\t",quote=F,col.names = NA)
+
+saveRDS(seurat,file = paste(opt$outdir,"/seurat.rds",sep = ""))
+
+
+
diff --git a/R_src/Seurat3_integration.R b/R_src/Seurat3_integration.R
new file mode 100644
index 0000000000000000000000000000000000000000..444b07cb9f3b12ee886056fdeaf3291b3ff1c4aa
--- /dev/null
+++ b/R_src/Seurat3_integration.R
@@ -0,0 +1,336 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(monocle))
+suppressMessages(library(Seurat))
+suppressMessages(library(BiocParallel))
+suppressMessages(library(getopt))
+suppressMessages(library(gridExtra))
+suppressMessages(library(gProfileR))
+suppressMessages(library(RColorBrewer))
+suppressMessages(library(readxl))
+suppressMessages(library(stringr))
+suppressMessages(library(scales))
+suppressMessages(library(cowplot))
+suppressMessages(library(sctransform))
+suppressMessages(library(stringr))
+suppressMessages(library(plyr))
+suppressMessages(library(grid))
+
+
+source("R_src/getSigPerCells.R")
+source("R_src/Enrichment.R")
+source("R_src/funForSeurat.R")
+
+
+# Seurat 3 integration workflow
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputFiles',  'i', 1, "character", "REQUIRED: 10X dataset paths prepared as hspc.combined object (.RDS generated by prepare_data.R) separated by +",
+  'signaturesFile', 's',1, "character", "REQUIRED: signatures rds file",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "num_dim_CCA", 'q',1,"numeric", "First n dimensions of CCA to use for FindIntegrationAnchors Seurat function (15 by default)",
+  "num_dim_weight", 'w',1,"numeric", "First n dimensions to use for IntegrateData Seurat function (15 by default)",
+  "num_dim",      'n',1,"numeric", "First n dimensions of PCA to use for clustering, UMAP and TSNE (15 by default)",
+  "num_dim_integrated",'N',1,"numeric", "Number of PCA dimension computed to analyse integrated data (40 by default)",
+  "cores",        'c',1, "numeric", "Number of cores to use for ordering (for differencially expressed gene between clusters test)",
+  "resolution",    'r',1, "numeric", "resolution for hspc.combined clustering",
+  "correction",      "b",1,"character", "Covariable to use as blocking factor (eg one or several columns of pData: betch, cell cycle phases... separated by +)",
+  "logfc_threshold", "l", 1, "numeric", "logfc threshold for finding cluster markers (1 cluster vs all deg) 0.25 by default",
+  "rodriguezSig", "k", 1, "character", "path for xls file of Rodriguez result",
+  "norm_method", "z",1, "character", "normalization method, logNorm (by default) or sctransform",
+  "reusePca", "p", 0, "character","re use pca calculated before when caculating anchor weights for each dataset default to FALSE (permit to correct for cell cycle before integration)"
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputFiles)) {
+  cat("Perform Seurat 3 integration workflow, then cluster the cell with seurat 3 at different resolutions")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+#set default arguments values
+
+if (is.null(opt$resolution)) {
+  opt$resolution <- 0.6
+}
+
+if (is.null(opt$logfc_threshold)) {
+  opt$logfc_threshold <- 0.25
+}
+
+if (is.null(opt$num_dim_CCA)) {
+  opt$num_dim_CCA <- 15
+}
+
+if (is.null(opt$num_dim_weight)) {
+  opt$num_dim_weight <- 15
+}
+
+if (is.null(opt$num_dim)) {
+  opt$num_dim <- 15
+}
+
+if (is.null(opt$num_dim_integrated)) {
+  opt$num_dim_integrated <- 40
+}
+
+if (is.null(opt$norm_method)) {
+  opt$norm_method <- "logNorm"
+}
+
+if (is.null(opt$reusePca)) {
+  opt$reusePca <- FALSE
+}
+
+blank <- grid.rect(gp=gpar(col="white"))
+
+print(opt$reusePca)
+
+dir.create(opt$outdir,recursive = T,showWarnings = F)
+
+
+corrections <- strsplit(x = opt$correction,split = "\\+")[[1]]
+
+
+dir.create(opt$outdir,recursive = T,showWarnings = F)
+
+
+hspc.listFile <- strsplit(opt$inputFiles,split = "\\+")[[1]]
+
+hspc.list <- list()
+#load dataset 
+
+for (i in 1:length(x = hspc.listFile)) {
+  #For testing, in final workflow sampleName will be incorporated in metadata with the loading of cell ranger matrix
+  sampleName <- strsplit(hspc.listFile[i],split = "/")[[1]][2]
+  hspc.list[[i]] <- readRDS(hspc.listFile[i])
+  hspc.list[[i]]@meta.data$sampleName <- sampleName
+  hspc.list[[i]] <- RenameCells(hspc.list[[i]],add.cell.id = sampleName)
+  
+  if(opt$norm_method != "sctransform") {
+    hspc.list[[i]] <- NormalizeData(object = hspc.list[[i]], verbose = FALSE)
+    hspc.list[[i]] <- FindVariableFeatures(object = hspc.list[[i]], selection.method = "vst", 
+                                           nfeatures = 2000, verbose = FALSE)
+  } else {
+    hspc.list[[i]] <- SCTransform(hspc.list[[i]],vars.to.regress = corrections,verbose = T)
+  }
+}
+
+
+hspc.anchors <- FindIntegrationAnchors(object.list = hspc.list, dims = 1:opt$num_dim_CCA)
+
+if(opt$reusePca) {
+  print("Re use pca")
+  hspc.combined <- IntegrateData(anchorset = hspc.anchors, weight.reduction = "pca", dims = 1:opt$num_dim_weight)
+}
+
+hspc.combined <- IntegrateData(anchorset = hspc.anchors, dims = 1:opt$num_dim_weight)
+
+DefaultAssay(object = hspc.combined) <- "integrated"
+
+# Run the standard workflow for visualization and clustering
+#Here certainly need to reuse SCTransform on combined data if norm_method = sct
+
+hspc.combined <- ScaleData(object = hspc.combined, verbose = T,vars.to.regress = corrections)
+hspc.combined <- RunPCA(object = hspc.combined, npcs = opt$num_dim_integrated, verbose = FALSE)
+
+
+png(paste(opt$outdir,"/ElbowPlot.png",sep =""))
+ElbowPlot(object = hspc.combined,ndims = opt$num_dim_integrated)
+dev.off()
+
+
+# t-SNE UMAP and Clustering
+hspc.combined <- RunUMAP(object = hspc.combined, reduction = "pca", dims = 1:opt$num_dim)
+hspc.combined <- RunTSNE(object = hspc.combined, reduction = "pca", dims = 1:opt$num_dim)
+hspc.combined <- FindNeighbors(object = hspc.combined, reduction = "pca", dims = 1:opt$num_dim)
+hspc.combined <- FindClusters(hspc.combined, resolution = c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.2))
+
+colPrefix <- "integrated_snn_res."
+
+Idents(hspc.combined) <- paste(colPrefix,opt$resolution,sep = "")
+
+hspc.combined@meta.data$AGE <- "Old"
+hspc.combined@meta.data$AGE[which(hspc.combined@meta.data$age == "2_months")] <- "Young"
+
+
+
+# Visualization of the samples
+p1 <- DimPlot(object = hspc.combined, reduction = "umap", group.by = "sampleName")
+p2 <- DimPlot(object = hspc.combined, reduction = "umap", label = TRUE)
+p3 <- DimPlot(object = hspc.combined, reduction = "umap", group.by = "AGE")
+p4 <- DimPlot(object = hspc.combined, reduction = "umap", group.by = "runDate")
+p3tsne <- DimPlot(object = hspc.combined, reduction = "tsne", group.by = "AGE")
+p4tsne <- DimPlot(object = hspc.combined, reduction = "tsne", group.by = "runDate")
+
+
+
+png(paste(opt$outdir,"/umap_samples.png",sep =""),height = 800,width=1200)
+grid.arrange(p1, p2,ncol = 2)
+dev.off()
+
+png(paste(opt$outdir,"/AGE.png",sep =""),height = 800,width=1200)
+grid.arrange(p3, p3tsne,ncol = 2)
+dev.off()
+
+png(paste(opt$outdir,"/runDate.png",sep =""),height = 800,width=1200)
+grid.arrange(p4, p4tsne,ncol = 2)
+dev.off()
+
+png(paste(opt$outdir,"/UmapClusters.png",sep =""),height = 800,width=1200)
+DimPlot(object = hspc.combined, reduction = "umap", label = TRUE)
+dev.off()
+
+
+p1tsne <- DimPlot(object = hspc.combined, reduction = "tsne", group.by = "sampleName")
+p2tsne <- DimPlot(object = hspc.combined, reduction = "tsne", label = TRUE)
+
+png(paste(opt$outdir,"/tsne_samples.png",sep =""),height = 800,width=1000)
+grid.arrange(p1tsne, p2tsne,ncol = 2)
+dev.off()
+
+#Visualistaoin one sample at a time
+sampleNames <- unique(hspc.combined@meta.data$sampleName) 
+dir.create(paste(opt$outdir,"/samples/umap/",sep = ""),recursive = T)
+dir.create(paste(opt$outdir,"/samples/tsne/",sep = ""),recursive = T)
+
+#samplePlotList <- list()
+  for (s in sampleNames) {
+    png(paste(opt$outdir,"/samples/umap/",s,".png",sep = ""))
+    plot(DimPlot(object = hspc.combined, cells= grep(s, hspc.combined@meta.data$sampleName), reduction = "umap") + ggtitle(s))
+    dev.off()
+    png(paste(opt$outdir,"/samples/tsne/",s,".png",sep = ""))
+    plot(DimPlot(object = hspc.combined, cells= grep(s, hspc.combined@meta.data$sampleName), reduction = "tsne") + ggtitle(s))
+    dev.off()
+  }
+
+
+umapListRes <- list()
+tsneListRes <- list()
+for (r in c(0.6,0.8,1,1.2)) {
+  umapListRes[[as.character(r)]] <- DimPlot(hspc.combined,
+                                            reduction = "umap",
+                                            label = T,
+                                            group.by = paste(colPrefix,r,sep="")) + 
+    NoLegend() +
+    ggtitle(paste("res",r))
+  
+  tsneListRes[[as.character(r)]] <- DimPlot(hspc.combined,
+                                            reduction = "tsne",
+                                            label = T,
+                                            group.by = paste(colPrefix,r,sep="")) + 
+    NoLegend() +
+    ggtitle(paste("res",r))
+}
+
+png(paste(opt$outdir,"/tsne_different_res.png",sep = ""),height = 800,width = 800)
+grid.arrange(tsneListRes[[1]],tsneListRes[[2]],tsneListRes[[3]],tsneListRes[[4]])
+dev.off()
+
+png(paste(opt$outdir,"/umap_different_res.png",sep = ""),height = 800,width = 800)
+grid.arrange(umapListRes[[1]],umapListRes[[2]],umapListRes[[3]],umapListRes[[4]])
+dev.off()
+
+
+
+hspc.combined@meta.data$numclust <- hspc.combined@meta.data[,paste(colPrefix,opt$resolution,sep = "")]
+
+
+#Check for unwanted source of variation UMAP
+pPhases <- DimPlot(hspc.combined,group.by = "phases")
+if (!is.null(hspc.combined@meta.data$predicted)) {
+  pPred <- DimPlot(hspc.combined,group.by = "predicted")
+} else {
+  pPred <- blank
+}
+pUMI <- FeaturePlot(hspc.combined, "Total_mRNAs")
+pMito <- FeaturePlot(hspc.combined, "percentMito")
+
+png(paste(opt$outdir,"/umap_factors.png",sep = ""),height = 800,width = 800)
+grid.arrange(pPhases,pPred,pUMI,pMito)
+dev.off()
+
+#Check for unwanted source of variation tSNE
+pPhases <- DimPlot(hspc.combined,group.by = "phases",reduction = "tsne")
+if (!is.null(hspc.combined@meta.data$predicted)) {
+  pPred <- DimPlot(hspc.combined,group.by = "predicted",reduction = "tsne")
+} else {
+  pPred <- blank
+}
+pUMI <- FeaturePlot(hspc.combined, "Total_mRNAs",reduction = "tsne")
+pMito <- FeaturePlot(hspc.combined, "percentMito",reduction = "tsne")
+
+png(paste(opt$outdir,"/tsne_factors.png",sep = ""),height = 800,width = 800)
+grid.arrange(pPhases,pPred,pUMI,pMito)
+dev.off()
+
+
+#Check for genes
+
+gene_list<- c("Pdzk1ip1","Mllt3",
+              "Ctla2a","Cd27","Cd34",
+              "Lig1","Hells","Tyms",
+              "Notch2","Lst1",
+              "Irf7","Stat1",
+              "Pf4","Itga2b",
+              "Klf1","Gata1",
+              "Mpo","Cd48",
+              "Fcer1a","Hdc",
+              "Il7r","Thy1", # Ccr9 not found
+              "Cdc20","Ccnb1","Racgap1",
+              "Mzb1","Ly6d", "Trp53inp1",
+              "Jun","Fos","Nr4a1","Jund")
+
+gene_list <- gene_list[which(is.element(set=rownames(hspc.combined),el = gene_list))] 
+
+dir.create(paste(opt$outdir,"/genes/umap/",sep =""),recursive = T)
+dir.create(paste(opt$outdir,"/genes/tsne/",sep =""),recursive = T)
+for (g in gene_list) {
+  png(paste(opt$outdir,"/genes/umap/",g,".png",sep = ""))
+  plot(FeaturePlot(hspc.combined,features = g))
+  dev.off()
+}
+
+for (g in gene_list) {
+  png(paste(opt$outdir,"/genes/tsne/",g,".png",sep = ""))
+  plot(FeaturePlot(hspc.combined,features = g,reduction = "tsne"))
+  dev.off()
+}
+
+#AGE prop for each cluster 
+age_clust <- getAGEPropPerClustBarplot(hspc.combined)
+#sample prop for each cluster
+sample_clust <- getSamplePropPerClustBarplot(hspc.combined)
+#runDate prop for each cluster
+runDate_clust <- getRunDatePropPerClustBarplot(hspc.combined)
+
+png(paste(opt$outdir,"/propPerClust.png",sep = ""))
+grid.arrange(age_clust,sample_clust,runDate_clust,p2tsne)
+dev.off()
+
+
+## saving results
+
+saveRDS(hspc.combined,paste(opt$outdir,"/combined.rds",sep =""))
+
+
+
+
+
+
diff --git a/R_src/Seurat3_integration_biology.R b/R_src/Seurat3_integration_biology.R
new file mode 100644
index 0000000000000000000000000000000000000000..327783be28510d2a40e6022a77d3c552a9c16658
--- /dev/null
+++ b/R_src/Seurat3_integration_biology.R
@@ -0,0 +1,219 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(monocle))
+suppressMessages(library(Seurat))
+suppressMessages(library(BiocParallel))
+suppressMessages(library(getopt))
+suppressMessages(library(gridExtra))
+suppressMessages(library(gProfileR))
+suppressMessages(library(RColorBrewer))
+suppressMessages(library(readxl))
+suppressMessages(library(stringr))
+suppressMessages(library(plyr))
+suppressMessages(library(biomaRt))
+
+
+
+suppressMessages(library(scales))
+
+
+source("R_src/getSigPerCells.R")
+source("R_src/Enrichment.R")
+source("R_src/funForSeurat.R")
+
+# Analysis of seurat 3 integration and clusternig workflow
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputRDS',  'i', 1, "character", "REQUIRED : 10X data prepared qs seurat object with CCA and first clustering (.RDS generated by prepare_data.R).",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "mart",         'm',1, "character", "mart data rds format if biomaRt server is down and you have a save ",
+  "resolution",       'r',1, "numeric", "resolution of the clustering analysed default to 0.8",
+  "num_dim",          'n', 1, "numeric", "first n dimension of the CCA to use for the reclustering",
+  "minPropCellExp", "p",1,"character", "proportion minimal of cells that expressed the genes kept for the analaysis 0.001 by default",
+  "signaturesFile", "s", 1, "character", "path to folder with signature list store as rds object",
+  "logfc_threshold", "l", 1, "numeric", "logfc threshold for finding cluster markers (1 cluster vs all deg) 0.25 by default",
+  "rodriguezSig", "k", 1, "character", "path for xls file of Rodriguez result",
+  "gprofiler", "g", 0, "logical", "Put to avoid gprofiler analysis",
+  "norm_method",    "z",1, "character", "normalisation method, logNorm seurat (by default) or sctransform"
+), byrow=TRUE, ncol=5);
+
+
+
+opt = getopt(spec)
+
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputRDS)) {
+  cat("Perform diff expression between clusters previously obtained with Seurat3, make a summary table of cluster metrics and signature enrichments")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+
+if (is.null(opt$logfc_threshold)) {
+  opt$logfc_threshold = 0.25
+} 
+if (is.null(opt$norm_method)) {
+  opt$norm_method = "logNorm"
+} 
+
+print(paste("logfc threshold:", opt$logfc_threshold) )
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+
+
+if(is.null(opt$doMarkers)) {
+  opt$doMarkers = T
+}
+
+if (is.null(opt$gprofiler)) {
+  opt$gprofiler <- T
+} else {
+  opt$gprofiler <- F
+}
+
+
+if (is.null(opt$mart)){
+  mart <- useDataset("mmusculus_gene_ensembl", useMart("ensembl"))
+} else {
+  mart <- readRDS(opt$mart)
+}
+
+
+dir.create(opt$outdir,recursive = T,showWarnings = F)
+
+hspc.combined <- readRDS(opt$inputRDS)
+signatures <- readRDS(opt$signaturesFile)
+
+# Set active idents/slot
+Idents(hspc.combined) <- paste("integrated_snn_res.",opt$resolution,sep ="")
+
+hspc.combined@meta.data$numclust <- Idents(hspc.combined)
+
+DefaultAssay(object = hspc.combined) <- "RNA"
+if (opt$norm_method == "sctransform") {
+  DefaultAssay(object = hspc.combined) <- "SCT"
+}
+
+## Plotting
+
+png(paste(opt$outdir,"/seurat_res",opt$resolution,".png",sep = ""),height = 800,width = 800)
+DimPlot(hspc.combined,label = TRUE,pt.size = 1.5) + ggtitle(paste("seurat_res",opt$resolution,sep = "")) + NoLegend()
+dev.off()
+
+#age <- getAGEPropPerClustBarplot(hspc.combined)
+
+print(paste("integrated_snn_res.",opt$resolution,sep =""))
+print(colnames(hspc.combined@meta.data))
+
+#ageEnrich <- getEnrichAge(hspc.combined,clustCol =paste("integrated_snn_res.",opt$resolution,sep =""),metaCol = "AGE")
+
+# ageEnrich <- as.data.frame(t(ageEnrich))
+# ageEnrich$color <- "black"
+# ageEnrich[which(as.numeric(as.vector(ageEnrich$phyper)) < 0.05),"color"] <- "red"
+# age <- age + theme(axis.text.y = element_text(colour = ageEnrich[,'color']))
+
+
+# png(paste(opt$outdir,"/bp_AGE.png",sep = ""),height = 800,width = 800)
+# plot(age)
+# dev.off()
+
+png(paste(opt$outdir,"/bp_sample.png",sep = ""),height = 800,width = 800)
+getSamplePropPerClustBarplot(hspc.combined) 
+dev.off()
+
+png(paste(opt$outdir,"/bp_runDate.png",sep = ""),height = 800,width = 800)
+getRunDatePropPerClustBarplot(hspc.combined) 
+dev.off()
+
+png(paste(opt$outdir,"/bp_phases.png",sep = ""),height = 800,width = 800)
+getPhasePropPerClustBarplot(hspc.combined) 
+dev.off()
+
+if (!is.null(hspc.combined@meta.data$predicted)) {
+png(paste(opt$outdir,"/bp_predicted.png",sep = ""),height = 800,width = 800)
+getPredictedPropPerClustBarplot(hspc.combined) 
+dev.off()
+}
+
+
+## Perform differential expression analysis between clusters
+
+
+dirMarkers <- paste(opt$outdir,"/markers_res",opt$resolution,sep ="")
+dir.create(dirMarkers)
+
+markers <- FindAllMarkers(hspc.combined,only.pos = T,logfc.threshold= opt$logfc_threshold)
+markers <- markers[which(markers$p_val_adj < 0.05),]
+
+ylab <- "LogNormalized UMI counts"
+if (opt$norm_method == "sctransform") {
+  ylab <- "Expression level"
+}
+
+for (numClust in unique(markers$cluster)) {
+  print(head(markers[which(markers$cluster == numClust),],n=9))
+  png(paste(dirMarkers,"/Cluster_",numClust,"_topGenesVlnPlot.png",sep =""),width = 1000, height = 1000)
+  print(plot(VlnPlot(object = hspc.combined, features = head(markers[which(markers$cluster == numClust),"gene"],n=9),pt.size = 0.5) + 
+               labs(x = "Clusters",y= ylab,colour = "black") +
+               theme(axis.text = element_text(size=20),
+                     plot.title = element_text(size=25)) ))
+  dev.off()
+}
+
+write.table(x = markers,paste(dirMarkers,"/markers.tsv", sep =""),sep = "\t",quote = F,row.names = F,col.names = T)
+
+print("getting TF markers")
+TF_markers <- getBM(attributes=c("external_gene_name"),
+                    filters=c("external_gene_name","go"),
+                    values=list(markers$gene,
+                                "GO:0003700"),mart=mart)
+
+#Add tf from bonzanni plus Zbtb16 and Gfi1b
+TF_bonzani <- data.frame(external_gene_name = c("Spi1","Tal1","Zfpm1","Cbfa2t3","Erg","Fli1","Gata1","Gata2","Hhex","Runx1","Smad6","Zbtb16","Gfi1b"))
+TF_markers <- rbind(TF_markers,TF_bonzani)
+TF_markers <- unique(TF_markers)
+
+write.table(TF_markers,paste(dirMarkers,"/markers_and_bonzanni_TF.tsv",sep =""),
+            sep = "\t",quote = F,row.names = F, col.names = F)
+
+# Modify colnames of markers to use gProfileAnalysis function
+firstup <- function(x) {
+  substr(x, 1, 1) <- toupper(substr(x, 1, 1))
+  x
+}
+
+colnames(markers) <- firstup(colnames(markers))
+
+
+if(opt$gprofiler) {
+  gprofiler_result <- gProfileAnalysis(deg_clust = markers,
+                                       outdir = paste(dirMarkers,"/gProfileR", sep =""),
+                                       background = rownames(hspc.combined),
+                                       colors = hue_pal()(length(unique(markers$Cluster))))
+  
+  saveRDS(gprofiler_result,file=paste(dirMarkers,"/gProfileR/gprofiler_results.rds",sep =""))
+}
+
+getClustTable(opt$rodriguezSig,markers=markers,signatures = signatures,seurat = hspc.combined,outdir = dirMarkers)
+
+
+
+
+
diff --git a/R_src/Seurat4CL.R b/R_src/Seurat4CL.R
new file mode 100644
index 0000000000000000000000000000000000000000..72860cbada218263263d0bb81f1447080e791191
--- /dev/null
+++ b/R_src/Seurat4CL.R
@@ -0,0 +1,490 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(Seurat))
+suppressMessages(library(BiocParallel))
+suppressMessages(library(getopt))
+suppressMessages(library(gridExtra))
+#suppressMessages(library(gProfileR))
+suppressMessages(library(RColorBrewer))
+suppressMessages(library(readxl))
+suppressMessages(library(stringr))
+suppressMessages(library(scales))
+suppressMessages(library(grid))
+suppressMessages(library(ggplot2))
+
+
+
+
+source("R_src/getSigPerCells.R")
+source("R_src/Enrichment.R")
+source("R_src/funForSeurat.R")
+
+# Seurat 3 analysis of an individual sample required for using CaSTLe script.
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputRDS',  'i', 1, "character", "REQUIRED : 10X data prepared as monocle or seurat object (.RDS generated by prepare_data.R).",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "num_dim",      'n',1,"numeric", "Number of dimension to use for ordering (eg n first pc of PCA on input data) 10 by default",
+  "correction",      "b",1,"character", "Covariable to use as blocking factor (eg one or several columns of pData: betch, cell cycle phases... separated by +)",
+  "minPropCellExp", "p",1,"numeric", "minimal proportion of cells that expressed the genes kept for the analaysis 0.001 by default",
+  "norm_method",    "z",1, "character", "normalisation method, logNorm seurat (by default) or sctransform",
+  "resolution", "r", 1,"numeric", "resolution for Seurat clustering 0.9 by default",
+  "signaturesFile", "s", 1, "character", "path to folder with signature list store as rds object",
+  "identRemoved",    "d", 1, "character", "Optionnal cluster to remove only work if input is a seurat object",
+  "nonExpressedGenesRemoved", "e", 0,"logical", "non expressed gene already removed default to FALSE",
+  "gprofiler", "g", 0, "logical", "if true doing gprofiler default to true",
+  "logfc_threshold", "l", 1, "numeric", "logfc threshold for finding cluster markers (1 cluster vs all deg) 0.25 by default"
+  
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+## For testing
+# 
+# opt <- list()
+# opt$outdir <- "output/testSeurat4/"
+# opt$correction <- "G2M_score+S_score+G1_score"
+# opt$inputRDS <- "seurat_treated.rds"
+# opt$signatureFile <- "output/signatures/publicSignatures.rds"
+
+
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputRDS)) {
+  cat("For an individual sample, perform PCA, tSNE, UMAP, Louvain clustering, diff expression between clusters with Seurat3 package with filtered poor quality cells data (gbm_cds monocle object)")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+
+#set default arguments values
+
+if (is.null(opt$num_dim)) {
+  opt$num_dim <- 10
+}
+
+if (is.null(opt$resolution)) {
+  opt$resolution <- 0.8
+}
+
+if (is.null(opt$minPropCellExp)) {
+  opt$minPropCellExp <- 0.001
+  print(opt$minPropCellExp)
+}
+
+
+if (is.null(opt$logfc_threshold)) {
+  opt$logfc_threshold = 0.25
+} 
+
+print(paste("logfc threshold:", opt$logfc_threshold) )
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+
+if (is.null(opt$norm_method)) {
+  opt$norm_method = "logNorm"
+}
+
+
+
+if (is.null(opt$nonExpressedGenesRemoved)) {
+  opt$nonExpressedGenesRemoved = F
+} else {
+  opt$nonExpressedGenesRemoved = T
+}
+
+# if (is.null(opt$gprofiler)) {
+#   opt$gprofiler <- T
+# } else {
+#   opt$gprofiler <- F
+# }
+
+
+# get correction vector
+if (!is.null(opt$correction)) {
+  corrections <- strsplit(x = opt$correction,split = "\\+")[[1]]
+} else {
+  corrections <- NULL
+}
+
+
+
+dir.create(opt$outdir,recursive = T,showWarnings = F)
+
+print(opt)
+
+# Loading
+seurat <- readRDS(opt$inputRDS)
+
+# Remove non expressed genes if not done
+if(opt$nonExpressedGenesRemoved == F) {
+  genesFiltered <- list(genes_before = dim(seurat)[1])
+  
+  print("remove non expressed genes (non expressed in at least X% of the cells X user option in monocle dp feature 5% in seurat tutorial 0,1%)")
+  threshold <- opt$minPropCellExp * ncol(seurat)
+
+  seurat <- CreateSeuratObject(counts = as.matrix(GetAssayData(seurat,slot = 'counts',assay = "RNA")),
+                               assay = "RNA",
+                               meta.data = seurat@meta.data,
+                               min.cells = threshold)
+  genesFiltered$genes_after <- dim(seurat)[1]
+  
+  png(paste(opt$outdir,"/genesFiltering.png",sep = ""))
+  barplot(unlist(genesFiltered),main = "Genes filtering")
+  dev.off()
+  
+}
+
+
+
+#################################################################################################
+####################################### Seurat Workflow #########################################
+#################################################################################################
+
+
+classicSeuratWorkflow <- function(seurat, corrections,outdir) {
+  dir.create(outdir,recursive = T,showWarnings = F)
+  
+  if (opt$norm_method == "sctransform") {
+    
+    seurat <- SCTransform(object = seurat, vars.to.regress = corrections)
+    
+  } else {
+    
+    seurat <- NormalizeData(object = seurat)
+    seurat <- FindVariableFeatures(object = seurat,selection.method = "vst", nfeatures = 2000, verbose = T)
+    seurat <- ScaleData(object = seurat,vars.to.regress = corrections)
+    
+  }
+  
+  seurat <- RunPCA(object = seurat)
+  
+  
+  png(paste(outdir,"/ElbowPlot.png",sep =""))
+  ElbowPlot(object = seurat,ndims = 30)
+  dev.off()
+  
+  
+  print("Clustering...")
+  
+  seurat <- FindNeighbors(object = seurat,dims = c(1:opt$num_dim),k.param = 20)
+  seurat <- FindClusters(object = seurat,resolution = c(0.5,0.6,0.7,0.8,0.9,1,1.2))
+  
+  print("Running TSNE...")
+  
+  seurat <- RunTSNE(seurat,dims = c(1:opt$num_dim))
+  
+  print("Running UMAP...")
+  
+  seurat <- RunUMAP(seurat,dims = c(1:opt$num_dim))
+  
+  print("UMAP ok")
+  print(colnames(seurat@meta.data))
+  
+  colPrefix <- "RNA_snn_res."
+  if(opt$norm_method == "sctransform") {
+    colPrefix <- "SCT_snn_res."
+  }
+  
+  umapListRes <- list()
+  tsneListRes <- list()
+  for (r in c(0.6,0.8,1,1.2)) {
+    umapListRes[[as.character(r)]] <- DimPlot(seurat,
+                                              reduction = "umap",
+                                              label = T,
+                                              group.by = paste(colPrefix,r,sep="")) + 
+      NoLegend() +
+      ggplot2::ggtitle(paste("res",r))
+    
+    tsneListRes[[as.character(r)]] <- DimPlot(seurat,
+                                              reduction = "tsne",
+                                              label = T,
+                                              group.by = paste(colPrefix,r,sep="")) + 
+      NoLegend() +
+      ggplot2::ggtitle(paste("res",r))
+  }
+  
+  png(paste(outdir,"/tsne_different_res.png",sep = ""),height = 800,width = 800)
+  grid.arrange(tsneListRes[[1]],tsneListRes[[2]],tsneListRes[[3]],tsneListRes[[4]])
+  dev.off()
+  
+  png(paste(outdir,"/umap_different_res.png",sep = ""),height = 800,width = 800)
+  grid.arrange(umapListRes[[1]],umapListRes[[2]],umapListRes[[3]],umapListRes[[4]])
+  dev.off()
+  
+  
+  
+  
+  Idents(seurat) <- paste(colPrefix,opt$resolution,sep = "")
+  seurat@meta.data$numclust <- seurat@meta.data[,paste(colPrefix,opt$resolution,sep = "")]
+  
+  
+  #Check for unwanted source of variation
+  pPhases <- DimPlot(seurat,group.by = "phases")
+  if (!is.null(seurat@meta.data$predicted)) {
+    pPred <- DimPlot(seurat,group.by = "predicted")
+  } else {
+    pPred <- DimPlot(seurat)
+  }
+  
+  pUMI <- FeaturePlot(seurat, "nCount_RNA")
+  pMito <- FeaturePlot(seurat, "percentMito")
+  
+  png(paste(outdir,"/umap_factors.png",sep = ""),height = 800,width = 800)
+  grid.arrange(pPhases,pPred,pUMI,pMito)
+  dev.off()
+  
+  #Check for unwanted source of variation
+  pPhases <- DimPlot(seurat,group.by = "phases",reduction = "tsne")
+  if (!is.null(seurat@meta.data$predicted)) {
+    pPred <- DimPlot(seurat,group.by = "predicted",reduction = "tsne")
+  } else {
+    pPred <- DimPlot(seurat,reduction = "tsne")
+  }
+  pUMI <- FeaturePlot(seurat, "nCount_RNA",reduction = "tsne")
+  pMito <- FeaturePlot(seurat, "percentMito",reduction = "tsne")
+  
+  png(paste(outdir,"/tsne_factors.png",sep = ""),height = 800,width = 800)
+  grid.arrange(pPhases,pPred,pUMI,pMito)
+  dev.off()
+  
+  return(seurat)
+  
+}
+
+
+## First without integration without correction
+unintegrated <- seurat
+DefaultAssay(unintegrated) <- "RNA"
+classicSeuratWorkflow(unintegrated,correction = NULL,outdir = paste0(opt$outdir,"/SeuratWoIntegrationWoCr/"))
+
+## without integration without correction
+
+classicSeuratWorkflow(unintegrated,correction = corrections,outdir = paste0(opt$outdir,"/SeuratWoIntegrationWithCr/"))
+
+
+## With integration without correction
+if (!is.null(corrections)) {
+  print("Seurat workflow without correction")
+  classicSeuratWorkflow(seurat,correction = NULL,outdir = paste0(opt$outdir,"/SeuratWithoutCr/"))
+}
+
+## With integration with correction
+
+seurat <- classicSeuratWorkflow(seurat,corrections = corrections,outdir =opt$outdir)
+
+#################################################################################################
+####################################### Markers analysis ########################################
+#################################################################################################
+
+markers <- FindAllMarkers(seurat,only.pos = T,logfc.threshold= opt$logfc_threshold)
+markers <- markers[which(markers$p_val_adj < 0.05),]
+
+write.table(x = markers,paste(opt$outdir,"/markers.tsv", sep =""),sep = "\t",quote = F,row.names = F,col.names = T)
+
+
+dir.create(paste(opt$outdir,"/markers/",sep = ""))
+
+ylab <- "LogNormalized UMI counts"
+if (opt$norm_method == "sctransform") {
+  ylab <- "Expression level"
+}
+
+for (numClust in unique(markers$cluster)) {
+  print(head(markers[which(markers$cluster == numClust),],n=9))
+  png(paste(opt$outdir,"/markers/Cluster_",numClust,"_topGenesVlnPlot.png",sep =""),width = 1000, height = 1000)
+  plot(VlnPlot(object = seurat, features = head(markers[which(markers$cluster == numClust),"gene"],n=9),pt.size = 0.5) + 
+         labs(x = "Clusters",y= ylab,colour = "black") +
+         theme(axis.text = element_text(size=20),
+               plot.title = element_text(size=25)) )
+  dev.off()
+  
+}
+
+
+
+## Add signatures scores
+dir.create(paste(opt$outdir,"/cellSignatures/",sep = ""))
+signatures <- readRDS(opt$signaturesFile)
+names(signatures) <- paste0(names(signatures),"_",seurat$sampleName[1])
+
+for (sig in c(1:length(signatures))) {
+  sigName <- names(signatures)[sig]
+  signature <- signatures[[sig]]
+  seurat <- scoreCells3(seurat,signature,outdir= paste(opt$outdir,"/cellSignatures/",sep=""),sigName)
+}
+
+#################################################################################################
+##########                                Enrichment                                   ##########
+#################################################################################################
+
+
+# ## Enrichment
+# 
+# # Modify colnames of markers to use gProfileAnalysis function
+# firstup <- function(x) {
+#   substr(x, 1, 1) <- toupper(substr(x, 1, 1))
+#   x
+# }
+# 
+# colnames(markers) <- firstup(colnames(markers))
+# 
+# 
+# #be careful background
+# if(opt$gprofiler) {
+# gprofiler_result <- gProfileAnalysis(deg_clust = markers,
+#                                      outdir = paste(opt$outdir,"/gProfileR", sep =""),
+#                                      background = rownames(seurat),
+#                                      colors = hue_pal()(length(unique(markers$Cluster))))
+# 
+# saveRDS(gprofiler_result,file=paste(opt$outdir,"/gProfileR/gprofiler_results.rds",sep =""))
+# }
+# 
+# 
+# 
+# # Test Rodriguez cluster sig
+# if(!is.null(opt$rodriguezSig)) {
+#   
+#   clusterNames <- c("C1","C2","C3","Mk","Er","Ba","Neu","Mo1","Mo2", "preDC","preB","preT")
+#   
+#   RodriguezClustersSig <- lapply(X= c(1:length(clusterNames)),FUN = read_xlsx,path = opt$rodriguezSig)
+#   
+#   names(RodriguezClustersSig) <- clusterNames 
+#   
+#   getOnlyPos <- function(clustersSig) {
+#     clusterSig <- clustersSig[which(clustersSig$log.effect > 0),]
+#     return(clusterSig)
+#   }
+#   
+#   RodriguezClustersSigPos <- lapply(X= RodriguezClustersSig, getOnlyPos)
+#   
+#   
+#   signaturesRodriguez <- lapply(RodriguezClustersSigPos,"[[",1 )
+#   
+#   
+#   firstup <- function(x) {
+#     substr(x, 1, 1) <- toupper(substr(x, 1, 1))
+#     x
+#   }
+#   
+#   colnames(markers) <- firstup(colnames(markers))
+#   
+#   getClustEnrichForRodriguez <- function(clust,signatures,seurat,markers) {
+#     clustSig <- lapply(signatures,testHyperSig3,seurat,markers,clust)
+#     if (!is.null(seurat@meta.data$predicted)) {
+#     propCellTypesLearned <- table(seurat@meta.data[which(seurat@meta.data$numclust==clust),"predicted"])/length(seurat@meta.data[which(seurat@meta.data$numclust==clust),"predicted"])
+#     } else{
+#       propCellTypesLearned <- NULL
+#     }
+#     clustInfo <- c(clustSig,propCellTypesLearned)
+#     return(clustInfo)
+#   }
+#   
+#   clust_list <- lapply(unique(markers$Cluster),getClustEnrichForRodriguez,signature=signaturesRodriguez,seurat =seurat,markers =markers) ##markers arg forgotten in testHyper sig
+# 
+#   names(clust_list) <- paste("cluster_",unique(markers$Cluster),sep="")
+#   
+#   clust_table <- as.data.frame(matrix(unlist(clust_list), nrow=length(unlist(clust_list[1]))))
+#   colnames(clust_table) <- names(clust_list)
+#   rownames(clust_table) <- names(clust_list[[1]])
+#   
+#   clust_df <- as.data.frame(t(clust_table))
+#   
+#   write.csv(clust_df,file = paste(opt$outdir,"/clustInfoRodriguez.csv",sep =""),quote = F)
+#   
+# }
+
+
+#################################################################################################
+##########                     Cluster summary table                                    #########
+#################################################################################################
+
+clust_table <- data.frame()
+
+print("Creating cluster summary table ")
+
+  firstup <- function(x) {
+    substr(x, 1, 1) <- toupper(substr(x, 1, 1))
+    x
+  }
+
+  colnames(markers) <- firstup(colnames(markers))
+
+getClustInfo <- function(clust,signatures,seurat,markers) {
+  
+  clustInfo <- list()
+  clustInfo$num_cells <- dim(seurat@meta.data[which(seurat@active.ident==clust),])[1]
+  clustInfo$percent_cells <- clustInfo$num_cells/dim(seurat@meta.data)[1]
+  percentPhases <- table(seurat@meta.data[which(seurat@active.ident==clust),"phases"])/length(seurat@meta.data[which(seurat@active.ident==clust),"phases"]) #In fact this is fraction not percentage
+  
+  if(!is.null(seurat@meta.data$predicted)) {
+    percentPredicted <- table(seurat@meta.data[which(seurat@active.ident==clust),"predicted"])/length(seurat@meta.data[which(seurat@active.ident==clust),"predicted"]) #In fact this is fraction not percentage
+    
+    for (p in unique(seurat@meta.data$predicted)) {
+      print(p)
+      if (is.element(p,names(percentPhases))) {
+        clustInfo[[p]] <- percentPhases[p] 
+      } else {
+        clustInfo[[p]] <- 0
+      }
+    }
+    
+  }
+  
+  for (p in c("G1_G0","S","G2_M")) {
+    if (is.element(p,names(percentPhases))) {
+      clustInfo[[p]] <- percentPhases[p] 
+    } else {
+      clustInfo[[p]] <- 0
+    }
+  }
+  
+  
+  clustInfo$median_genes_expressed <- median(seurat@meta.data[which(seurat@active.ident==clust),"nFeature_RNA"])
+  clustInfo$median_nUMI <- median(seurat@meta.data[which(seurat@active.ident==clust),"nCount_RNA"])
+  clustInfo$median_percentMitochGenes <- median(seurat@meta.data[which(seurat@active.ident==clust),"percentMito"])
+  
+  
+  clustSig <- lapply(signatures,testHyperSig3,seurat,markers,clust) 
+  
+  clustInfo <- c(clustInfo,clustSig)
+  
+}
+
+#allSignatures <- c(signatures,signaturesRodriguez)
+
+clust_list <- lapply(levels(unique(seurat@active.ident)),getClustInfo,signatures,seurat,markers)
+
+names(clust_list) <- paste("cluster_",levels(unique(seurat@active.ident)),sep="")
+
+saveRDS(clust_list,paste(opt$outdir,"/clust_list_save.rds",sep =""))
+
+clust_table <- as.data.frame(matrix(unlist(clust_list), nrow=length(unlist(clust_list[1]))))
+colnames(clust_table) <- names(clust_list)
+rownames(clust_table) <- names(clust_list[[1]])
+
+
+clust_df <- as.data.frame(t(clust_table))
+
+write.table(x = clust_df,file = paste(opt$outdir,"/clusters_table.tsv",sep =""),sep="\t",quote=F,col.names = NA)
+
+saveRDS(seurat,file = paste(opt$outdir,"/seurat.rds",sep = ""))
+
+
+
diff --git a/R_src/Seurat4_integration.R b/R_src/Seurat4_integration.R
new file mode 100644
index 0000000000000000000000000000000000000000..3b679bac5b62a0600afa126135bc236a19bada4c
--- /dev/null
+++ b/R_src/Seurat4_integration.R
@@ -0,0 +1,335 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+#suppressMessages(library(monocle))
+suppressMessages(library(Seurat))
+suppressMessages(library(BiocParallel))
+suppressMessages(library(getopt))
+suppressMessages(library(gridExtra))
+#suppressMessages(library(gProfileR))
+suppressMessages(library(RColorBrewer))
+suppressMessages(library(readxl))
+suppressMessages(library(stringr))
+suppressMessages(library(scales))
+suppressMessages(library(cowplot))
+suppressMessages(library(sctransform))
+suppressMessages(library(stringr))
+suppressMessages(library(plyr))
+suppressMessages(library(grid))
+suppressMessages(library(ggplot2))
+
+
+
+source("R_src/getSigPerCells.R")
+source("R_src/Enrichment.R")
+source("R_src/funForSeurat.R")
+
+
+# Seurat 3 integration workflow
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputFiles',  'i', 1, "character", "REQUIRED: 10X dataset paths prepared as seurat object (.RDS generated by prepare_data.R) separated by +",
+  'signaturesFile', 's',1, "character", "REQUIRED: signatures rds file",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "num_dim_CCA", 'q',1,"numeric", "First n dimensions of CCA to use for FindIntegrationAnchors Seurat function (15 by default)",
+  "num_dim_weight", 'w',1,"numeric", "First n dimensions to use for IntegrateData Seurat function (15 by default)",
+  "num_dim",      'n',1,"numeric", "First n dimensions of PCA to use for clustering, UMAP and TSNE (15 by default)",
+  "num_dim_integrated",'N',1,"numeric", "Number of PCA dimension computed to analyse integrated data (40 by default)",
+  "cores",        'c',1, "numeric", "Number of cores to use for ordering (for differencially expressed gene between clusters test)",
+  "resolution",    'r',1, "numeric", "resolution for smp.combined clustering",
+  "correction",      "b",1,"character", "Covariable to use as blocking factor (eg one or several columns of pData: betch, cell cycle phases... separated by +)",
+  "logfc_threshold", "l", 1, "numeric", "logfc threshold for finding cluster markers (1 cluster vs all deg) 0.25 by default",
+  "rodriguezSig", "k", 1, "character", "path for xls file of Rodriguez result",
+  "norm_method", "z",1, "character", "normalization method, logNorm (by default) or sctransform",
+  "reusePca", "p", 0, "character","re use pca calculated before when caculating anchor weights for each dataset default to FALSE (permit to correct for cell cycle before integration)"
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputFiles)) {
+  cat("Perform Seurat 3 integration workflow, then cluster the cell with seurat 3 at different resolutions")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+#set default arguments values
+
+if (is.null(opt$resolution)) {
+  opt$resolution <- 0.6
+}
+
+if (is.null(opt$logfc_threshold)) {
+  opt$logfc_threshold <- 0.25
+}
+
+if (is.null(opt$num_dim_CCA)) {
+  opt$num_dim_CCA <- 15
+}
+
+if (is.null(opt$num_dim_weight)) {
+  opt$num_dim_weight <- 15
+}
+
+if (is.null(opt$num_dim)) {
+  opt$num_dim <- 15
+}
+
+if (is.null(opt$num_dim_integrated)) {
+  opt$num_dim_integrated <- 40
+}
+
+if (is.null(opt$norm_method)) {
+  opt$norm_method <- "logNorm"
+}
+
+if (is.null(opt$reusePca)) {
+  opt$reusePca <- FALSE
+}
+
+print(opt$reusePca)
+
+dir.create(opt$outdir,recursive = T,showWarnings = F)
+
+
+corrections <- strsplit(x = opt$correction,split = "\\+")[[1]]
+
+
+dir.create(opt$outdir,recursive = T,showWarnings = F)
+
+
+smp.listFile <- strsplit(opt$inputFiles,split = "\\+")[[1]]
+
+smp.list <- list()
+#load dataset 
+
+for (i in 1:length(x = smp.listFile)) {
+  #For testing, in final workflow sampleName will be incorporated in metadata with the loading of cell ranger matrix
+  smp.list[[i]] <- readRDS(smp.listFile[i])
+  sampleName <- unique(smp.list[[i]]@meta.data$sampleName)
+  smp.list[[i]] <- RenameCells(smp.list[[i]],add.cell.id = sampleName)
+  
+  if(opt$norm_method != "sctransform") {
+    smp.list[[i]] <- NormalizeData(object = smp.list[[i]], verbose = FALSE)
+    smp.list[[i]] <- FindVariableFeatures(object = smp.list[[i]], selection.method = "vst", 
+                                           nfeatures = 2000, verbose = FALSE)
+  } else {
+    smp.list[[i]] <- SCTransform(smp.list[[i]],vars.to.regress = corrections,verbose = T)
+  }
+}
+
+
+smp.anchors <- FindIntegrationAnchors(object.list = smp.list, dims = 1:opt$num_dim_CCA)
+
+if(opt$reusePca) {
+  print("Re use pca")
+  smp.combined <- IntegrateData(anchorset = smp.anchors, weight.reduction = "pca", dims = 1:opt$num_dim_weight)
+}
+
+smp.combined <- IntegrateData(anchorset = smp.anchors, dims = 1:opt$num_dim_weight)
+
+
+# Run the standard workflow for visualization and clustering
+#Here certainly need to reuse SCTransform on combined data if norm_method = sct
+
+
+classicSeuratWorkflow <- function(smp.combined, corrections,outdir) {
+  dir.create(outdir,recursive = T)
+  smp.combined <- ScaleData(object = smp.combined, verbose = T,vars.to.regress = corrections)
+  smp.combined <- RunPCA(object = smp.combined, npcs = opt$num_dim_integrated, verbose = FALSE)
+  
+  
+  png(paste(outdir,"/ElbowPlot.png",sep =""))
+  ElbowPlot(object = smp.combined,ndims = opt$num_dim_integrated)
+  dev.off()
+  
+  
+  # t-SNE UMAP and Clustering
+  smp.combined <- RunUMAP(object = smp.combined, reduction = "pca", dims = 1:opt$num_dim)
+  smp.combined <- RunTSNE(object = smp.combined, reduction = "pca", dims = 1:opt$num_dim)
+  smp.combined <- FindNeighbors(object = smp.combined, reduction = "pca", dims = 1:opt$num_dim)
+  smp.combined <- FindClusters(smp.combined, resolution = c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.2))
+  
+  colPrefix <- paste0(DefaultAssay(smp.combined),"_snn_res.")
+  
+  Idents(smp.combined) <- paste(colPrefix,opt$resolution,sep = "")
+  
+  smp.combined@meta.data$AGE <- "Old"
+  smp.combined@meta.data$AGE[which(smp.combined@meta.data$age == "2_months")] <- "Young"
+  
+  
+  
+  # Visualization of the samples
+  p1 <- DimPlot(object = smp.combined, reduction = "umap", group.by = "sampleName")
+  p2 <- DimPlot(object = smp.combined, reduction = "umap", label = TRUE)
+  p3 <- DimPlot(object = smp.combined, reduction = "umap", group.by = "AGE")
+  p4 <- DimPlot(object = smp.combined, reduction = "umap", group.by = "runDate")
+  p3tsne <- DimPlot(object = smp.combined, reduction = "tsne", group.by = "AGE")
+  p4tsne <- DimPlot(object = smp.combined, reduction = "tsne", group.by = "runDate")
+  
+  
+  
+  png(paste(outdir,"/umap_samples.png",sep =""),height = 800,width=1200)
+  grid.arrange(p1, p2,ncol = 2)
+  dev.off()
+  
+  png(paste(outdir,"/AGE.png",sep =""),height = 800,width=1200)
+  grid.arrange(p3, p3tsne,ncol = 2)
+  dev.off()
+  
+  png(paste(outdir,"/runDate.png",sep =""),height = 800,width=1200)
+  grid.arrange(p4, p4tsne,ncol = 2)
+  dev.off()
+  
+  png(paste(outdir,"/UmapClusters.png",sep =""),height = 800,width=1200)
+  DimPlot(object = smp.combined, reduction = "umap", label = TRUE)
+  dev.off()
+  
+  
+  p1tsne <- DimPlot(object = smp.combined, reduction = "tsne", group.by = "sampleName")
+  p2tsne <- DimPlot(object = smp.combined, reduction = "tsne", label = TRUE)
+  
+  png(paste(outdir,"/tsne_samples.png",sep =""),height = 800,width=1000)
+  grid.arrange(p1tsne, p2tsne,ncol = 2)
+  dev.off()
+  
+  #Visualistaoin one sample at a time
+  sampleNames <- unique(smp.combined@meta.data$sampleName) 
+  dir.create(paste(outdir,"/samples/umap/",sep = ""),recursive = T)
+  dir.create(paste(outdir,"/samples/tsne/",sep = ""),recursive = T)
+  
+  #samplePlotList <- list()
+  for (s in sampleNames) {
+    png(paste(outdir,"/samples/umap/",s,".png",sep = ""))
+    plot(DimPlot(object = smp.combined, cells= grep(s, smp.combined@meta.data$sampleName), reduction = "umap") + ggtitle(s))
+    dev.off()
+    png(paste(outdir,"/samples/tsne/",s,".png",sep = ""))
+    plot(DimPlot(object = smp.combined, cells= grep(s, smp.combined@meta.data$sampleName), reduction = "tsne") + ggtitle(s))
+    dev.off()
+  }
+  
+  
+  umapListRes <- list()
+  tsneListRes <- list()
+  for (r in c(0.6,0.8,1,1.2)) {
+    umapListRes[[as.character(r)]] <- DimPlot(smp.combined,
+                                              reduction = "umap",
+                                              label = T,
+                                              group.by = paste(colPrefix,r,sep="")) + 
+      NoLegend() +
+      ggtitle(paste("res",r))
+    
+    tsneListRes[[as.character(r)]] <- DimPlot(smp.combined,
+                                              reduction = "tsne",
+                                              label = T,
+                                              group.by = paste(colPrefix,r,sep="")) + 
+      NoLegend() +
+      ggtitle(paste("res",r))
+  }
+  
+  png(paste(outdir,"/tsne_different_res.png",sep = ""),height = 800,width = 800)
+  grid.arrange(tsneListRes[[1]],tsneListRes[[2]],tsneListRes[[3]],tsneListRes[[4]])
+  dev.off()
+  
+  png(paste(outdir,"/umap_different_res.png",sep = ""),height = 800,width = 800)
+  grid.arrange(umapListRes[[1]],umapListRes[[2]],umapListRes[[3]],umapListRes[[4]])
+  dev.off()
+  
+  
+  
+  smp.combined@meta.data$numclust <- smp.combined@meta.data[,paste(colPrefix,opt$resolution,sep = "")]
+  
+  
+  #Check for unwanted source of variation UMAP
+  pPhases <- DimPlot(smp.combined,group.by = "phases")
+  if (!is.null(smp.combined@meta.data$predicted)) {
+    pPred <- DimPlot(smp.combined,group.by = "predicted")
+  } else {
+    pPred <-  DimPlot(smp.combined)
+  }
+  pUMI <- FeaturePlot(smp.combined, "nCount_RNA")
+  pMito <- FeaturePlot(smp.combined, "percentMito")
+  
+  png(paste(outdir,"/umap_factors.png",sep = ""),height = 800,width = 800)
+  grid.arrange(pPhases,pPred,pUMI,pMito)
+  dev.off()
+  
+  #Check for unwanted source of variation tSNE
+  pPhases <- DimPlot(smp.combined,group.by = "phases",reduction = "tsne")
+  if (!is.null(smp.combined@meta.data$predicted)) {
+    pPred <- DimPlot(smp.combined,group.by = "predicted",reduction = "tsne")
+  } else {
+    pPred <-  DimPlot(smp.combined,reduction = "tsne")
+  }
+  pUMI <- FeaturePlot(smp.combined, "nCount_RNA",reduction = "tsne")
+  pMito <- FeaturePlot(smp.combined, "percentMito",reduction = "tsne")
+  
+  png(paste(outdir,"/tsne_factors.png",sep = ""),height = 800,width = 800)
+  grid.arrange(pPhases,pPred,pUMI,pMito)
+  dev.off()
+  return(smp.combined)
+}
+
+print("Seurat workflow without integration")
+DefaultAssay(object = smp.combined) <- "RNA"
+smp.combined <- NormalizeData(object = smp.combined)
+smp.combined <- FindVariableFeatures(object = smp.combined,selection.method = "vst", nfeatures = 2000, verbose = T)
+
+if (!is.null(corrections)) {
+  print("Seurat workflow without correction")
+  classicSeuratWorkflow(smp.combined,correction = NULL,outdir = paste0(opt$outdir,"/SeuratMergingWoIntegrationWoCr/"))
+}
+
+classicSeuratWorkflow(smp.combined,corrections = corrections,outdir = paste0(opt$outdir,"/SeuratMergingWoIntegration/"))
+
+
+
+print("Seurat workflow with integration")
+
+DefaultAssay(object = smp.combined) <- "integrated"
+
+
+if (!is.null(corrections)) {
+  print("Seurat workflow without correction")
+  classicSeuratWorkflow(smp.combined,correction = NULL,outdir = paste0(opt$outdir,"/SeuratWithoutCr/"))
+}
+
+smp.combined <- classicSeuratWorkflow(smp.combined,corrections = corrections,outdir =opt$outdir)
+
+
+
+#AGE prop for each cluster 
+age_clust <- getAGEPropPerClustBarplot(smp.combined)
+#sample prop for each cluster
+sample_clust <- getSamplePropPerClustBarplot(smp.combined)
+#runDate prop for each cluster
+runDate_clust <- getRunDatePropPerClustBarplot(smp.combined)
+
+png(paste(opt$outdir,"/propPerClust.png",sep = ""))
+grid.arrange(age_clust,sample_clust,runDate_clust,
+             DimPlot(object = smp.combined, reduction = "tsne", label = TRUE))
+dev.off()
+
+
+## saving results
+
+saveRDS(smp.combined,paste(opt$outdir,"/combined.rds",sep =""))
+
+
+
+
+
+
diff --git a/R_src/SignacMotifAnnotCL.R b/R_src/SignacMotifAnnotCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..6fb5896188d8e40fc1fdf03cc66f805ae576465c
--- /dev/null
+++ b/R_src/SignacMotifAnnotCL.R
@@ -0,0 +1,324 @@
+#-----------------------------------------------------#
+#           Signac Motif analysis tutorial
+#-----------------------------------------------------#
+
+suppressMessages(library(getopt))
+suppressMessages(library(Signac))
+suppressMessages(library(Seurat))
+suppressMessages(library(JASPAR2020))
+suppressMessages(library(TFBSTools))
+suppressMessages(library(BSgenome.Mmusculus.UCSC.mm10))
+suppressMessages(library(ggplot2))
+# library(patchwork)
+
+
+##-------------------------------------------------------------------------##
+## Option list
+##-------------------------------------------------------------------------##
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputRDS',  'i', 1, "character", "REQUIRED: 10X data prepared as seurat/signac object.",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  'species', 's', 1, "numeric", "Species ID number for the motif DB, human = 9606, mouse = 10090 (default human)",
+  'assay', 'a', 1, "character", "Name of the assay that contain the atac data (default 'peaks')",
+  'cluster', 'c', 1, "character", "Name of the cluster identity for the markers",
+  'IdentName', 'n', 1, "character", "Idents new name '+' separated",
+  'IdentLevel', 'l', 1, "character", "Levels of the new Ident '+' separated",
+  'integrated', 'I', 1, "logical", "Is the RDS an integrated dataset or not"
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputRDS)) {
+  cat("Help message")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+
+if (is.null(opt$species)){
+  opt$species <- 9606
+}
+
+if(is.null(opt$assay)){
+  opt$assay <- "peaks"
+}
+
+# opt$inputRDS <- "/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/TransferRnaAtac/coembedAll.rds"
+# opt$IdentName <- "Neu1+Neu3+Neu2+NeuRA1+Rep+NeuRA2"
+# opt$IdentLevel <- "Rep+Neu1+Neu2+Neu3+NeuRA1+NeuRA2"
+# opt$species <- 9606
+# opt$cluster <- "numclust"
+
+
+defAssay <- opt$assay
+
+outdir <- opt$outdir
+
+dir.create(outdir, recursive = T)
+
+#------------------------------------#
+#      Load the motif dataBase
+#------------------------------------#
+print("Load JASPAR DB")
+# Get a list of motif position frequency matrices from the JASPAR database
+# species = 9606 is human species id while the tutorial is using mice dataset
+pfm <- getMatrixSet(
+  x = JASPAR2020,
+  opts = list(species = opt$species, all_versions = FALSE)
+)
+
+#------------------------------------#
+#  Read seurat and prepare the data
+#------------------------------------#
+
+seurat <- readRDS(opt$inputRDS)
+
+DefaultAssay(seurat) <- defAssay
+
+Idents(seurat) <- opt$cluster
+
+print("Entering analysis loop")
+print(paste("opt$integrated == ", opt$integrated, sep = ""))
+if(opt$integrated == TRUE){
+  # Prepare data 
+  # Rename the cluster with real name for an easier analysis
+  new.ident.name <- strsplit(opt$IdentName, split = "\\+")[[1]]
+  
+  names(new.ident.name) <- levels(seurat)
+  
+  seurat <- RenameIdents(seurat, new.ident.name)
+  seurat@meta.data$FinalCluster <- Idents(seurat)
+  
+  ident_levels <- strsplit(opt$IdentLevel, split = "\\+")[[1]]
+  
+  seurat@meta.data$FinalCluster <- factor(seurat@meta.data$FinalCluster, levels = ident_levels)
+  
+  Idents(seurat) <- "FinalCluster"
+  
+  # Subset only ATAC data
+  Idents(seurat) <- "condType"
+  seurat_atac <- subset(seurat, idents = grep(pattern = "ATAC", x = unique(Idents(seurat)), value = T))
+  seurat_atac[[defAssay]] <-  as.ChromatinAssay(seurat_atac[[defAssay]], seqinfo = "mm10")
+  
+  Idents(seurat_atac) <- "FinalCluster"
+
+  # Adding motif information
+  seurat_atac <- AddMotifs(object = seurat_atac, genome = BSgenome.Mmusculus.UCSC.mm10, pfm = pfm)
+  
+  motif_correl <- seurat_atac@assays$peaks@motifs@motif.names 
+  motif_correl.df <- data.frame(do.call("rbind", motif_correl))
+  names(motif_correl.df)[1] <- "Motif_Name"
+  motif_correl.df$Motif_ID <- rownames(motif_correl.df)
+  
+  write.table(x = motif_correl.df, file = paste0(outdir, "/motif_name_table.tsv"), sep = "\t", row.names = F, col.names = T, quote = F)
+  
+  #----------------------------#
+  #  Method 1 : Find DA region
+  #----------------------------#
+  # dir.create(paste0(outdir, "/Overrep_Motif"), recursive = T)
+  # print("Starting method 1")
+  # ## Treatment impact
+  # dir.create(paste0(outdir, "/Overrep_Motif/Treatment"), recursive = T)
+  # # FindMarkers using Logistic regression
+  # Idents(seurat_atac) <- "condition"
+  # 
+  # da_peaks_treatment <- FindMarkers(seurat_atac, ident.1 = "Treated", ident.2 = "Control", only.pos = T, test.use = "LR", latent.vars = "nCount_peaks")
+  # da_peaks_treatment.sig <- rownames(da_peaks_treatment[da_peaks_treatment$p_val < 0.005, ])
+  # 
+  # # Calculate a motif score for each of the most DA region
+  # enriched_motifs_treatment <- FindMotifs(object = seurat_atac, features = da_peaks_treatment.sig)
+  # motif_plot_treatment <- MotifPlot(seurat_atac, head(enriched_motifs_treatment$motif, 6))
+  # 
+  # ggsave(paste0(outdir, "/Overrep_Motif/Treatment", "/Top_motif_treatment", '.png'), plot = motif_plot_treatment, device = 'png', path = NULL,
+  #        scale = 1, width = 20, height = 15, units = 'cm', dpi = 300)
+  # 
+  # write.table(da_peaks_treatment, file = paste0(outdir, "/Overrep_Motif/Treatment", "/DA_genomic_regions_treatment", ".tsv"), 
+  #             sep = "\t", col.names = T, row.names = T, quote = F)
+  # write.table(enriched_motifs_treatment, file = paste0(outdir, "/Overrep_Motif/Treatment", "/Motif_enrichment_treatment", ".tsv"), 
+  #             sep = "\t", col.names = T, row.names = T, quote = F)
+  # print("Treatment analysis done")
+  # 
+  # ## Cluster impact
+  # dir.create(paste0(outdir, "/Overrep_Motif/Cluster"), recursive = T)
+  # 
+  # Idents(seurat_atac) <- "FinalCluster"
+  # 
+  # print(unique(seurat_atac@meta.data$FinalCluster))
+  # # BiocParallel::register(BiocParallel::SerialParam())
+  # # da_peaks_cluster <- FindAllMarkers(seurat_atac, only.pos = T, test.use = "LR", latent.vars = "nCount_peaks")
+  # da_peaks_cluster.list <- list()
+  # for(cluster in unique(seurat_atac$FinalCluster)){
+  #   print(paste("Find markers cluster", cluster, sep = " "))
+  #   if(nrow(seurat_atac@meta.data[seurat_atac$FinalCluster %in% cluster,]) >3){
+  #     da_peaks_cluster.list[[cluster]] <- FindMarkers(seurat_atac, ident.1 = cluster, only.pos = T, test.use = "LR", latent.vars = "nCount_peaks")
+  #     da_peaks_cluster.list[[cluster]]$Cluster <- cluster
+  #     da_peaks_cluster.list[[cluster]]$peaks <- rownames(da_peaks_cluster.list[[cluster]])
+  #   } else{
+  #     print("Not enough cells")
+  #   }
+  # }
+  # 
+  # da_peaks_cluster.db <- do.call("rbind", da_peaks_cluster.list)
+  # print("FindMarkers cluster done")
+  # 
+  # enriched_motifs_cluster.list <- list()
+  # for(cluster in unique(da_peaks_cluster.db$Cluster)){
+  #   print(paste(cluster, "FindMotif", sep = " "))
+  #   da_peaks_cluster.sig <- da_peaks_cluster.db[which(da_peaks_cluster.db$p_val < 0.005 & da_peaks_cluster.db$Cluster == cluster),]
+  #   da_peaks_cluster.sig.name <- da_peaks_cluster.sig$peaks
+  #   enriched_motifs_cluster.list[[paste0(cluster, "_motifs")]] <- FindMotifs(seurat_atac, features = da_peaks_cluster.sig.name)
+  #   enriched_motifs_cluster.list[[paste0(cluster, "_motifs")]]$Cluster <- cluster
+  #   
+  #   # motif_plot <- MotifPlot(atac_small, motifs = head(enriched_motifs_cluster.list[[paste0(cluster, "_motifs")]]$motif, 6))
+  #   # ggsave(paste0(outdir, "/Overrep_Motif/Cluster", "/Top_motif_cluster_", cluster, '.png'), plot = motif_plot, device = 'png', path = NULL,
+  #   #        scale = 1, width = 20, height = 15, units = 'cm', dpi = 300)
+  # }
+  # enriched_motifs_cluster.df <- do.call("rbind", enriched_motifs_cluster.list)
+  # 
+  # write.table(da_peaks_cluster.db, file = paste0(outdir, "/Overrep_Motif/Cluster", "/Motif_enrichment_cluster", ".tsv"), 
+  #             sep = "\t", col.names = T, row.names = T, quote = F)
+  # write.table(enriched_motifs_cluster.df, file = paste0(outdir, "/Overrep_Motif/Cluster", "/Motif_enrichment_cluster", ".tsv"), 
+  #             sep = "\t", col.names = T, row.names = T, quote = F)
+  # print("cluster impact done")
+  #---------------------------------------#
+  #  Method 2 : Computing motif activity
+  #---------------------------------------#
+  dir.create(paste0(outdir, "/Motif_activity"), recursive = T)
+  
+  print("Starting method 2")
+  # Computing motif per cell motif activity score
+  # Try this for error with multicore
+  BiocParallel::register(BiocParallel::SerialParam())
+  seurat_atac <- RunChromVAR(object = seurat_atac, genome = BSgenome.Mmusculus.UCSC.mm10)
+  
+  DefaultAssay(seurat_atac) <- "chromvar"
+  print("End chromvar")
+  
+  # Find differential activity in motif per treatment
+  dir.create(paste0(outdir, "/Motif_activity/Treatment"), recursive = T)
+  print("Starting treatment analysis")
+  
+  Idents(seurat_atac) <- "condition"
+  diff_act_treatment <- FindMarkers(object = seurat_atac, ident.1 = "Treated", ident.2 = "Control", test.use = "LR", latent.vars = "nCount_peaks")
+  diff_act_treatment$Motif_Name <- motif_correl.df[rownames(diff_act_treatment), "Motif_Name"]
+  
+  top_motif_treatment <- MotifPlot(object = seurat_atac, motifs = head(rownames(diff_act_treatment), 6), assay = "peaks")
+  
+  write.table(diff_act_treatment, file = paste0(outdir, "/Motif_activity/Treatment", "/Motif_enrich_treatment", ".tsv"), 
+              sep = "\t", row.names = T, col.names = T, quote = F)
+  
+  ggsave(paste0(outdir, "/Motif_activity/Treatment", "/Top_motif_treatment", '.png'), plot = top_motif_treatment, device = 'png', path = NULL,
+         scale = 1, width = 20, height = 15, units = 'cm', dpi = 300)
+  print("End treatment analysis")
+  
+  # Find differential activity in motif per cluster
+  dir.create(paste0(outdir, "/Motif_activity/Cluster"), recursive = T)
+  
+  print("Starting cluster analysis")
+  Idents(seurat_atac) <- "FinalCluster"
+  diff_act_cluster <- FindAllMarkers(object = seurat_atac, only.pos = T, test.use = "LR", latent.vars = "nCount_peaks")
+  
+  for(cluster in unique(diff_act_cluster$cluster)){
+    top_motif <- diff_act_cluster[diff_act_cluster$cluster %in% cluster,]
+    top_motif.name <- head(top_motif$gene, 6)
+    
+    top_motif.plot <- MotifPlot(object = seurat_atac, motifs = top_motif.name, assay = "peaks")
+    ggsave(paste0(outdir, "/Motif_activity/Cluster", "/Top_motif_cluster_", cluster, '.png'), plot = top_motif.plot, device = 'png', path = NULL,
+           scale = 1, width = 20, height = 15, units = 'cm', dpi = 300)
+  }
+  
+
+  
+  # rownames(diff_act_cluster) <- diff_act_cluster$gene
+  print("Adding Motif_name to table output")
+  diff_act_cluster.list <- list()
+  for(cluster in unique(diff_act_cluster$cluster)){
+    diff_act_cluster.temp <- diff_act_cluster[diff_act_cluster$cluster %in% cluster,]
+    rownames(diff_act_cluster.temp) <- diff_act_cluster.temp$gene
+    diff_act_cluster.temp$Motif_Name <- motif_correl.df[rownames(diff_act_cluster.temp), "Motif_Name"]
+    diff_act_cluster.list[[cluster]] <- diff_act_cluster.temp
+  }
+  diff_act_cluster.final <- do.call("rbind", diff_act_cluster.list)
+  
+  write.table(diff_act_cluster.final, file = paste0(outdir, "/Motif_activity/Cluster", "/Motif_enrich_cluster", ".tsv"), 
+              sep = "\t", row.names = T, col.names = T, quote = F)
+  print("End cluster analysis")
+  
+  # Analysing motif markers of difference between pop IN a cluster (ex : Clus1_Treated vs Clust1_Control)
+  print("Starting the treatment per cluster analysis")
+  dir.create(paste0(outdir, "/Motif_activity/Treatment_per_clust"), recursive = T)
+  
+  seurat_atac$cluster_cond <- paste(seurat_atac$condition, seurat_atac$FinalCluster, sep = "_")
+  Idents(seurat_atac) <- "cluster_cond"
+
+  diff_act_cluster_cond.list <- list()
+  for(cluster in unique(seurat_atac$FinalCluster)){
+    print(paste("Treated_vs_Control", cluster, sep = "_"))
+    
+    if(nrow(seurat_atac@meta.data[seurat_atac$cluster_cond %in% paste("Control", cluster, sep = "_"),]) >3 && 
+       nrow(seurat_atac@meta.data[seurat_atac$cluster_cond %in% paste("Treated", cluster, sep = "_"),])){
+      print("Enough cells for analysis")
+      diff_act_cluster_cond.list[[paste("Treated_vs_Control", cluster, sep = "_")]] <- FindMarkers(object = seurat_atac, ident.1 = paste("Treated", cluster, sep = "_"),
+                                                                                                   ident.2 = paste("Control", cluster, sep = "_"),
+                                                                                                   test.use = 'LR', latent.vars = 'nCount_peaks')
+      
+      diff_act_cluster_cond.list[[paste("Treated_vs_Control", cluster, sep = "_")]]$Comparison <- "Treated_vs_Control"
+      diff_act_cluster_cond.list[[paste("Treated_vs_Control", cluster, sep = "_")]]$Cluster <- cluster
+      diff_act_cluster_cond.list[[paste("Treated_vs_Control", cluster, sep = "_")]]$Motif_ID <- rownames(diff_act_cluster_cond.list[[paste("Treated_vs_Control", cluster, sep = "_")]])
+      diff_act_cluster_cond.list[[paste("Treated_vs_Control", cluster, sep = "_")]]$Motif_Name <- motif_correl.df[rownames(diff_act_cluster_cond.list[[paste("Treated_vs_Control", cluster, sep = "_")]]), "Motif_Name"]
+    }else{
+      print("Not enough cells")
+    }
+  }
+  diff_act_cluster_cond.df <- do.call("rbind", diff_act_cluster_cond.list)
+  
+  print("Plotting top motif for treated and control per cluster")
+  for(cluster in unique(diff_act_cluster_cond.df$Cluster)){
+    top_motif <- diff_act_cluster_cond.df[diff_act_cluster_cond.df$Cluster %in% cluster,]
+    top_motif.name <- head(top_motif$Motif_ID, 6)
+    bot_motif.name <- tail(top_motif$Motif_ID, 6)
+    
+    top_motif.plot <- MotifPlot(object = seurat_atac, motifs = top_motif.name, assay = "peaks")
+    bot_motif.plot <- MotifPlot(object = seurat_atac, motifs = bot_motif.name, assay = "peaks")
+    ggsave(paste0(outdir, "/Motif_activity/Treatment_per_clust", "/Top_motif_treated_cluster_", cluster, '.png'), plot = top_motif.plot, device = 'png', path = NULL,
+           scale = 1, width = 20, height = 15, units = 'cm', dpi = 300)
+    ggsave(paste0(outdir, "/Motif_activity/Treatment_per_clust", "/Top_motif_control_cluster_", cluster, '.png'), plot = bot_motif.plot, device = 'png', path = NULL,
+           scale = 1, width = 20, height = 15, units = 'cm', dpi = 300)
+  }
+  
+  write.table(diff_act_cluster_cond.df, file = paste0(outdir, "/Motif_activity/Treatment_per_clust", "/Motif_enrich_cluster", ".tsv"), 
+              sep = "\t", row.names = T, col.names = T, quote = F)
+  print("End of the treatment per cluster analysis")
+  
+  saveRDS(seurat_atac, file = paste0(outdir, "/atac_motif", '.rds'))
+  
+} else if(opt$integrated == FALSE){
+  print("Only computing motif activity score")
+  
+  # Adding motif information
+  seurat <- AddMotifs(object = seurat, genome = BSgenome.Mmusculus.UCSC.mm10, pfm = pfm)
+  
+  motif_correl <- seurat@assays$peaks@motifs@motif.names 
+  motif_correl.df <- data.frame(do.call("rbind", motif_correl))
+  names(motif_correl.df)[1] <- "Motif_Name"
+  motif_correl.df$Motif_ID <- rownames(motif_correl.df)
+  
+  write.table(x = motif_correl.df, file = paste0(outdir, "/motif_name_table.tsv"), sep = "\t", row.names = F, col.names = T, quote = F)
+  
+  # Calculate motif score
+  BiocParallel::register(BiocParallel::SerialParam())
+  seurat <- RunChromVAR(object = seurat, genome = BSgenome.Mmusculus.UCSC.mm10)
+  
+  saveRDS(seurat, file = paste0(outdir, "/atac_motif", ".rds"))
+}
+
+
diff --git a/R_src/SignacMotifAnnotCL_v2.R b/R_src/SignacMotifAnnotCL_v2.R
new file mode 100644
index 0000000000000000000000000000000000000000..328896f7c90d3b5b3f1162748ed24a7a55ed7525
--- /dev/null
+++ b/R_src/SignacMotifAnnotCL_v2.R
@@ -0,0 +1,250 @@
+#-----------------------------------------------------#
+#           Signac Motif analysis tutorial
+#-----------------------------------------------------#
+
+suppressMessages(library(getopt))
+suppressMessages(library(Signac))
+suppressMessages(library(Seurat))
+suppressMessages(library(JASPAR2020))
+suppressMessages(library(TFBSTools))
+suppressMessages(library(BSgenome.Mmusculus.UCSC.mm10))
+suppressMessages(library(ggplot2))
+# library(patchwork)
+
+
+##-------------------------------------------------------------------------##
+## Option list
+##-------------------------------------------------------------------------##
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputRDS',  'i', 1, "character", "REQUIRED: 10X data prepared as seurat/signac object.",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  'species', 's', 1, "numeric", "Species ID number for the motif DB, human = 9606, mouse = 10090 (default human)",
+  'assay', 'a', 1, "character", "Name of the assay that contain the atac data (default 'peaks')",
+  'cluster', 'c', 1, "character", "Name of the cluster identity for the markers",
+  'IdentName', 'n', 1, "character", "Idents new name '+' separated",
+  'IdentLevel', 'l', 1, "character", "Levels of the new Ident '+' separated",
+  'integrated', 'I', 1, "logical", "Is the RDS an integrated dataset or not"
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputRDS)) {
+  cat("Help message")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+
+if (is.null(opt$species)){
+  opt$species <- 9606
+}
+
+if(is.null(opt$assay)){
+  opt$assay <- "peaks"
+}
+
+# opt$inputRDS <- "/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/TransferRnaAtac/coembedAll.rds"
+# opt$IdentName <- "Neu2+Rep+Neu3+NeuRA2+NeuRA1+Neu1"
+# opt$IdentLevel <- "Rep+Neu1+Neu2+Neu3+NeuRA1+NeuRA2"
+# opt$species <- 9606
+# opt$cluster <- "predicted.id"
+
+
+defAssay <- opt$assay
+
+outdir <- opt$outdir
+
+dir.create(outdir, recursive = T)
+
+#------------------------------------#
+#      Load the motif dataBase
+#------------------------------------#
+print("Load JASPAR DB")
+# Get a list of motif position frequency matrices from the JASPAR database
+# species = 9606 is human species id while the tutorial is using mice dataset
+pfm <- getMatrixSet(
+  x = JASPAR2020,
+  opts = list(species = opt$species, all_versions = FALSE)
+)
+
+#------------------------------------#
+#  Read seurat and prepare the data
+#------------------------------------#
+
+seurat <- readRDS(opt$inputRDS)
+
+DefaultAssay(seurat) <- defAssay
+
+Idents(seurat) <- opt$cluster
+
+print("Entering analysis loop")
+print(paste("opt$integrated == ", opt$integrated, sep = ""))
+if(opt$integrated == TRUE){
+  # Prepare data 
+  # Rename the cluster with real name for an easier analysis
+  new.ident.name <- strsplit(opt$IdentName, split = "\\+")[[1]]
+  
+  names(new.ident.name) <- levels(seurat)
+  
+  seurat <- RenameIdents(seurat, new.ident.name)
+  seurat@meta.data$FinalCluster <- Idents(seurat)
+  
+  ident_levels <- strsplit(opt$IdentLevel, split = "\\+")[[1]]
+  
+  seurat@meta.data$FinalCluster <- factor(seurat@meta.data$FinalCluster, levels = ident_levels)
+  
+  Idents(seurat) <- "FinalCluster"
+  
+  seurat_atac <- seurat
+  # Adding motif information
+  seurat_atac <- AddMotifs(object = seurat_atac, genome = BSgenome.Mmusculus.UCSC.mm10, pfm = pfm)
+  
+  motif_correl <- seurat_atac@assays$peaks@motifs@motif.names 
+  motif_correl.df <- data.frame(do.call("rbind", motif_correl))
+  names(motif_correl.df)[1] <- "Motif_Name"
+  motif_correl.df$Motif_ID <- rownames(motif_correl.df)
+  
+  write.table(x = motif_correl.df, file = paste0(outdir, "/motif_name_table.tsv"), sep = "\t", row.names = F, col.names = T, quote = F)
+  
+  #---------------------------------------#
+  #  Method 2 : Computing motif activity
+  #---------------------------------------#
+  dir.create(paste0(outdir, "/Motif_activity"), recursive = T)
+  
+  print("Starting method 2")
+  # Computing motif per cell motif activity score
+  # Try this for error with multicore
+  BiocParallel::register(BiocParallel::SerialParam())
+  seurat_atac <- RunChromVAR(object = seurat_atac, genome = BSgenome.Mmusculus.UCSC.mm10)
+  
+  DefaultAssay(seurat_atac) <- "chromvar"
+  print("End chromvar")
+  
+  # Find differential activity in motif per treatment
+  dir.create(paste0(outdir, "/Motif_activity/Treatment"), recursive = T)
+  print("Starting treatment analysis")
+  
+  Idents(seurat_atac) <- "condition"
+  diff_act_treatment <- FindMarkers(object = seurat_atac, ident.1 = "Treated", ident.2 = "Control", test.use = "LR", latent.vars = "nCount_peaks")
+  diff_act_treatment$Motif_Name <- motif_correl.df[rownames(diff_act_treatment), "Motif_Name"]
+  
+  top_motif_treatment <- MotifPlot(object = seurat_atac, motifs = head(rownames(diff_act_treatment), 6), assay = "peaks")
+  
+  write.table(diff_act_treatment, file = paste0(outdir, "/Motif_activity/Treatment", "/Motif_enrich_treatment", ".tsv"), 
+              sep = "\t", row.names = T, col.names = T, quote = F)
+  
+  ggsave(paste0(outdir, "/Motif_activity/Treatment", "/Top_motif_treatment", '.png'), plot = top_motif_treatment, device = 'png', path = NULL,
+         scale = 1, width = 20, height = 15, units = 'cm', dpi = 300)
+  print("End treatment analysis")
+  
+  # Find differential activity in motif per cluster
+  dir.create(paste0(outdir, "/Motif_activity/Cluster"), recursive = T)
+  
+  print("Starting cluster analysis")
+  Idents(seurat_atac) <- "FinalCluster"
+  diff_act_cluster <- FindAllMarkers(object = seurat_atac, only.pos = T, test.use = "LR", latent.vars = "nCount_peaks")
+  
+  for(cluster in unique(diff_act_cluster$cluster)){
+    top_motif <- diff_act_cluster[diff_act_cluster$cluster %in% cluster,]
+    top_motif.name <- head(top_motif$gene, 6)
+    
+    top_motif.plot <- MotifPlot(object = seurat_atac, motifs = top_motif.name, assay = "peaks")
+    ggsave(paste0(outdir, "/Motif_activity/Cluster", "/Top_motif_cluster_", cluster, '.png'), plot = top_motif.plot, device = 'png', path = NULL,
+           scale = 1, width = 20, height = 15, units = 'cm', dpi = 300)
+  }
+  
+  
+  
+  # rownames(diff_act_cluster) <- diff_act_cluster$gene
+  print("Adding Motif_name to table output")
+  diff_act_cluster.list <- list()
+  for(cluster in unique(diff_act_cluster$cluster)){
+    diff_act_cluster.temp <- diff_act_cluster[diff_act_cluster$cluster %in% cluster,]
+    rownames(diff_act_cluster.temp) <- diff_act_cluster.temp$gene
+    diff_act_cluster.temp$Motif_Name <- motif_correl.df[rownames(diff_act_cluster.temp), "Motif_Name"]
+    diff_act_cluster.list[[cluster]] <- diff_act_cluster.temp
+  }
+  diff_act_cluster.final <- do.call("rbind", diff_act_cluster.list)
+  
+  write.table(diff_act_cluster.final, file = paste0(outdir, "/Motif_activity/Cluster", "/Motif_enrich_cluster", ".tsv"), 
+              sep = "\t", row.names = T, col.names = T, quote = F)
+  print("End cluster analysis")
+  
+  # Analysing motif markers of difference between pop IN a cluster (ex : Clus1_Treated vs Clust1_Control)
+  print("Starting the treatment per cluster analysis")
+  dir.create(paste0(outdir, "/Motif_activity/Treatment_per_clust"), recursive = T)
+  
+  seurat_atac$cluster_cond <- paste(seurat_atac$condition, seurat_atac$FinalCluster, sep = "_")
+  Idents(seurat_atac) <- "cluster_cond"
+  
+  diff_act_cluster_cond.list <- list()
+  for(cluster in unique(seurat_atac$FinalCluster)){
+    print(paste("Treated_vs_Control", cluster, sep = "_"))
+    
+    if(nrow(seurat_atac@meta.data[seurat_atac$cluster_cond %in% paste("Control", cluster, sep = "_"),]) >3 && 
+       nrow(seurat_atac@meta.data[seurat_atac$cluster_cond %in% paste("Treated", cluster, sep = "_"),])){
+      print("Enough cells for analysis")
+      diff_act_cluster_cond.list[[paste("Treated_vs_Control", cluster, sep = "_")]] <- FindMarkers(object = seurat_atac, ident.1 = paste("Treated", cluster, sep = "_"),
+                                                                                                   ident.2 = paste("Control", cluster, sep = "_"),
+                                                                                                   test.use = 'LR', latent.vars = 'nCount_peaks')
+      
+      diff_act_cluster_cond.list[[paste("Treated_vs_Control", cluster, sep = "_")]]$Comparison <- "Treated_vs_Control"
+      diff_act_cluster_cond.list[[paste("Treated_vs_Control", cluster, sep = "_")]]$Cluster <- cluster
+      diff_act_cluster_cond.list[[paste("Treated_vs_Control", cluster, sep = "_")]]$Motif_ID <- rownames(diff_act_cluster_cond.list[[paste("Treated_vs_Control", cluster, sep = "_")]])
+      diff_act_cluster_cond.list[[paste("Treated_vs_Control", cluster, sep = "_")]]$Motif_Name <- motif_correl.df[rownames(diff_act_cluster_cond.list[[paste("Treated_vs_Control", cluster, sep = "_")]]), "Motif_Name"]
+    }else{
+      print("Not enough cells")
+    }
+  }
+  diff_act_cluster_cond.df <- do.call("rbind", diff_act_cluster_cond.list)
+  
+  print("Plotting top motif for treated and control per cluster")
+  for(cluster in unique(diff_act_cluster_cond.df$Cluster)){
+    top_motif <- diff_act_cluster_cond.df[diff_act_cluster_cond.df$Cluster %in% cluster,]
+    top_motif.name <- head(top_motif$Motif_ID, 6)
+    bot_motif.name <- tail(top_motif$Motif_ID, 6)
+    
+    top_motif.plot <- MotifPlot(object = seurat_atac, motifs = top_motif.name, assay = "peaks")
+    bot_motif.plot <- MotifPlot(object = seurat_atac, motifs = bot_motif.name, assay = "peaks")
+    ggsave(paste0(outdir, "/Motif_activity/Treatment_per_clust", "/Top_motif_treated_cluster_", cluster, '.png'), plot = top_motif.plot, device = 'png', path = NULL,
+           scale = 1, width = 20, height = 15, units = 'cm', dpi = 300)
+    ggsave(paste0(outdir, "/Motif_activity/Treatment_per_clust", "/Top_motif_control_cluster_", cluster, '.png'), plot = bot_motif.plot, device = 'png', path = NULL,
+           scale = 1, width = 20, height = 15, units = 'cm', dpi = 300)
+  }
+  
+  write.table(diff_act_cluster_cond.df, file = paste0(outdir, "/Motif_activity/Treatment_per_clust", "/Motif_enrich_cluster", ".tsv"), 
+              sep = "\t", row.names = T, col.names = T, quote = F)
+  print("End of the treatment per cluster analysis")
+  
+  saveRDS(seurat_atac, file = paste0(outdir, "/atac_motif", '.rds'))
+  
+} else if(opt$integrated == FALSE){
+  print("Only computing motif activity score")
+  
+  # Adding motif information
+  seurat <- AddMotifs(object = seurat, genome = BSgenome.Mmusculus.UCSC.mm10, pfm = pfm)
+  
+  motif_correl <- seurat@assays$peaks@motifs@motif.names 
+  motif_correl.df <- data.frame(do.call("rbind", motif_correl))
+  names(motif_correl.df)[1] <- "Motif_Name"
+  motif_correl.df$Motif_ID <- rownames(motif_correl.df)
+  
+  write.table(x = motif_correl.df, file = paste0(outdir, "/motif_name_table.tsv"), sep = "\t", row.names = F, col.names = T, quote = F)
+  
+  # Calculate motif score
+  BiocParallel::register(BiocParallel::SerialParam())
+  seurat <- RunChromVAR(object = seurat, genome = BSgenome.Mmusculus.UCSC.mm10)
+  
+  saveRDS(seurat, file = paste0(outdir, "/atac_motif", ".rds"))
+}
+
+
diff --git a/R_src/SignacOneSampleCL.R b/R_src/SignacOneSampleCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..0aef397096d7969629a4f67f1f72753ec4952bb1
--- /dev/null
+++ b/R_src/SignacOneSampleCL.R
@@ -0,0 +1,151 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(Signac))
+suppressMessages(library(Seurat))
+suppressMessages(library(GenomeInfoDb))
+suppressMessages(library(ggplot2))
+suppressMessages(library(patchwork))
+set.seed(1234)
+
+
+# Analysis of seurat 3 integration and clusternig workflow
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'filename',    'i', 1, 'character', 'input filtered_peak_bc_matrix.h5',
+  'metafile',    'm', 1, 'character', 'input metadata file eg. singlecell.csv',
+  'fragments',    'f', 1, 'character', 'fragments.tsv.gz file',
+  'genome',    'g',  1, 'character', 'genome,  corresponding R libraries have to be installed (eg EnsDb.Mmusculus.v79) for mm10',
+  'minCell',     'c', 1, 'numeric', 'min cell number by feature (default 10)',
+  'minFeature',  'e', 1, 'numeric', "min feature number per cells (default 200)",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "tfTested",         't',1, "character", "TF tested list (eg TF with a motif in the scenic database",
+  "regulonJson",       'r',1, "character", "Regulon main Json file",
+  "regulonJsonSupp", "s", 1, "character", "Regulon supp Json files (separated by +)",
+  "subConditionName", "n", 1, "character", "correspondong names (separated by +)"
+  
+), byrow=TRUE, ncol=5);
+
+## default settings
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+} 
+
+if (is.null(opt$minCell)) {
+  opt$minCell = 10
+} 
+
+if (is.null(opt$minFeature)) {
+  opt$minFeature = 200
+} 
+
+# Printing option before running
+
+for (o in names(opt)) {
+  print(paste(o,":", opt[[o]]))
+}
+
+## Load data
+counts <- Read10X_h5(filename = opt$filename)
+
+metadata <- read.csv(
+  file = opt$metafile,
+  header = TRUE,
+  row.names = 1
+)
+
+chrom_assay <- CreateChromatinAssay(
+  counts = counts,
+  sep = c(":", "-"),
+  genome = opt$genome,
+  fragments = opt$fragments,
+  min.cells = opt$minCell,
+  min.features = opt$minFeature
+)
+
+signac <- CreateSeuratObject(
+  counts = chrom_assay,
+  assay = "peaks",
+  meta.data = metadata
+)
+
+if(opt$genome == "mm10") {
+  suppressMessages(library(EnsDb.Mmusculus.v79))
+  ensdb = EnsDb.Mmusculus.v79
+}
+
+# extract gene annotations from EnsDb
+annotations <- GetGRangesFromEnsDb(ensdb = ensdb)
+
+# change to UCSC style since the data was mapped to hg19
+seqlevelsStyle(annotations) <- 'UCSC'
+genome(annotations) <- opt$genome
+
+# add the gene information to the object
+Annotation(signac) <- annotations
+
+signac <- NucleosomeSignal(object = signac)
+
+# Quality control and cells filtering
+
+# compute TSS enrichment score per cell
+signac <- TSSEnrichment(object = signac, fast = FALSE)
+
+# add blacklist ratio and fraction of reads in peaks
+signac$pct_reads_in_peaks <- signac$peak_region_fragments / signac$passed_filters * 100
+signac$blacklist_ratio <- signac$blacklist_region_fragments / signac$peak_region_fragments
+
+# Plot TSS enrichment, separating cells in two groups
+signac$high.tss <- ifelse(signac$TSS.enrichment > 2, 'High', 'Low')
+
+png(pasteO(opt$oudir,'/TSS_enrichment.png'),width = 800,height = 800)
+TSSPlot(signac, group.by = 'high.tss') + NoLegend()
+dev.off()
+
+# Plot FragmentHistogram on chr1, separating cells in two groups depending on nucleosome signal
+
+signac$nucleosome_group <- ifelse(signac$nucleosome_signal > 2, 'NS > 2', 'NS < 2')
+
+png(pasteO(opt$oudir,'/fragmentHistogram.png'),width = 800,height = 800)
+FragmentHistogram(object = signac,region = "chr1-1-100000000",group.by = 'nucleosome_group')
+dev.off()
+
+# VlnPlot of control metrics
+png(pasteO(opt$oudir,'/vlncontrolMetrics.png'),width = 1400,height = 500)
+
+VlnPlot(
+  object = signac,
+  features = c('pct_reads_in_peaks', 'peak_region_fragments',
+               'TSS.enrichment', 'blacklist_ratio', 'nucleosome_signal'),
+  pt.size = 0.001,
+  ncol = 5
+)
+dev.off()
+
+# Filter outliers
+signac <- subset(
+  x = signac,
+  subset = peak_region_fragments > 3000 &
+    peak_region_fragments < 20000 &
+    pct_reads_in_peaks > 15 &
+    blacklist_ratio < 0.05 &
+    nucleosome_signal < 4 &
+    TSS.enrichment > 2
+)
+
+signac
+
+
+
+
diff --git a/R_src/actinnSeuratCL.R b/R_src/actinnSeuratCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..b9e1d0b3468bd8c7595261c42d125cfbd11face9
--- /dev/null
+++ b/R_src/actinnSeuratCL.R
@@ -0,0 +1,188 @@
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+print("Loading library")
+
+library(Seurat)
+library(getopt)
+library(scales)
+library(RColorBrewer)
+library(plyr)
+library(ggplot2)
+library(gridExtra)
+library(cowplot)
+library(reshape2)
+library(cowplot)
+library(gridExtra)
+
+source("R_src/do_consensus.R")
+source("R_src/funForAnalysisSeurat.R")
+
+theme_set(theme_classic())
+
+
+
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputRDS',  'i', 1, "character", "REQUIRED: 10X data ordered by monocle (.RDS generated by orderCL.R).",
+  'actinnPathGMP', 'g', 1, "character", "Path to actinn multiple run directory",
+  'actinnPathProg', 'p', 1, "character", "Path to actinn multiple run directory",
+  'signatures', 's', 1, "character", "Signature files from nestorowa analysis",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)'
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputRDS)) {
+  cat("Gene filtering of a gbm cds ordered. For scenic use, two filters")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+outdir <- opt$outdir
+
+# Data for test
+# opt$inputRDS <- "/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/RNA/streamAnalysisSeurat/seuratPseudotime.rds"
+# opt$actinnPathGMP <- "/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/RNA/multiple_Actinn_Nestorowa_gmp/"
+# opt$actinnPathProg <- "/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/RNA/multiple_Actinn_Nestorowa_prog/"
+# opt$signatures <- "/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/RNA/Nestorowa/progenitors_signature.csv"
+
+dir.create(outdir, recursive = T)
+
+print("Loading files")
+# Read Seurat file
+seurat <- readRDS(opt$inputRDS)
+
+# Read and do actinn consensus
+actinn_consensus_gmp <- do_consensus(opt$actinnPathGMP)
+actinn_consensus_prog <- do_consensus(opt$actinnPathProg)
+
+# Read signatures list
+signatures <- read.table(opt$signatures, sep = ",", header = T)
+
+## Color palette
+
+colorTreatment <- c("#664CFF", "#FF8000")
+colorCluster <- c("#E69F00", "#CC79A7", "#0072B2", "#009E73", "#D55E00", "#56B4E9")
+
+
+# Add actinn result as meta data 
+
+seurat$cellType_Actinn_GMP <- actinn_consensus_gmp[rownames(seurat@meta.data),"Consensus"] 
+seurat$cellType_Actinn_GMP <- factor(seurat$cellType_Actinn_GMP, levels = c("GMP_broad", "CMP_broad", "MEP_broad"))
+
+seurat$cellType_Actinn_Prog <- actinn_consensus_prog[rownames(seurat@meta.data),"Consensus"] 
+seurat$cellType_Actinn_Prog <- factor(seurat$cellType_Actinn_Prog, levels = c("Prog", "HSPC"))
+
+# Calculate signature score
+
+for(sig in 1:ncol(signatures)){
+  print(names(signatures[sig]))
+  seurat <- AddModuleScore(seurat, features = list(signatures[,sig]), name = names(signatures[sig]))
+  colnames(seurat@meta.data)[which(colnames(seurat@meta.data) == paste0(names(signatures[sig]), 1))] <- names(signatures[sig])
+}
+
+seurat[["Signature_nesto"]] <- CreateAssayObject(data = t(as.matrix(seurat@meta.data[,names(signatures)])))
+
+
+## Plotting results of Actinn transfer
+# Cell size UMAP 0.4
+
+## Prog transfer and signature ##
+## We only look at Prog signature, the transfer result is not convincing
+
+# UMAP 
+# UMAP_actinn_prog <- DimPlot(seurat, group.by = "cellType_Actinn_prog") + ggtitle("")
+
+
+# Violin plot
+Prog_all_melt <- melt(seurat@meta.data[,c("Prog_signature_up", "Prog_signature_down")])
+Prog_Ctrl_melt <- melt(seurat@meta.data[seurat$condition %in% "Control", c("Prog_signature_up", "Prog_signature_down")])
+Prog_RA_melt <- melt(seurat@meta.data[seurat$condition %in% "Treated", c("Prog_signature_up", "Prog_signature_down")])
+
+
+
+violinplot_prog <- ggplot(Prog_all_melt, aes(x = variable, y = value)) +
+  geom_violin(fill = "grey") + theme(legend.position = "none", axis.title.x = element_blank()) +
+  ylab("Signature score") + scale_x_discrete(labels = c("Prog_signature_up" = "Prog", "Prog_signature_down" = "HSPC"))
+
+violinplot_Ctrl_prog <- ggplot(Prog_Ctrl_melt, aes(x = variable, y = value)) +
+  geom_violin(fill = "grey") + theme(legend.position = "none", axis.title.x = element_blank()) +
+  ylab("Signature score") + scale_x_discrete(labels = c("Prog_signature_up" = "Prog", "Prog_signature_down" = "HSPC"))
+
+violinplot_RA_prog <- ggplot(Prog_RA_melt, aes(x = variable, y = value)) +
+  geom_violin(fill = "grey") + theme(legend.position = "none", axis.title.x = element_blank()) +
+  ylab("Signature score") + scale_x_discrete(labels = c("Prog_signature_up" = "Prog", "Prog_signature_down" = "HSPC"))
+
+
+ggsave(paste0(outdir, 'Vln_Prog_HSPC', '.png'), plot = violinplot_prog, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 10, units = 'cm', dpi = 300)
+ggsave(paste0(outdir, 'Vln_Ctrl_Prog_HSPC', '.png'), plot = violinplot_Ctrl_prog, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 10, units = 'cm', dpi = 300)
+ggsave(paste0(outdir, 'Vln_RA_Prog_HSPC', '.png'), plot = violinplot_RA_prog, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 10, units = 'cm', dpi = 300)
+## GMP transfer and signature visualisation ##
+
+# Violin Plot
+GMP_all_melt <- melt(seurat@meta.data[,c("GMP_signature_up", "CMP_signature_up", "MEP_signature_up")])
+GMP_Ctrl_melt <- melt(seurat@meta.data[seurat$condition %in% "Control", c("GMP_signature_up", "CMP_signature_up", "MEP_signature_up")])
+GMP_RA_melt <- melt(seurat@meta.data[seurat$condition %in% "Treated", c("GMP_signature_up", "CMP_signature_up", "MEP_signature_up")])
+
+violinplot_gmp <- ggplot(GMP_all_melt, aes(x = variable, y = value)) +
+  geom_violin(fill = "grey") + theme(legend.position = "none", axis.title.x = element_blank()) +
+  ylab("Signature score") + scale_x_discrete(labels = c("GMP_signature_up" = "GMP", "CMP_signature_up" = "CMP", "MEP_signature_up" = "MEP"))
+violinplot_Ctrl_gmp <- ggplot(GMP_Ctrl_melt, aes(x = variable, y = value)) +
+  geom_violin(fill = "grey") + theme(legend.position = "none", axis.title.x = element_blank()) +
+  ylab("Signature score") + scale_x_discrete(labels = c("GMP_signature_up" = "GMP", "CMP_signature_up" = "CMP", "MEP_signature_up" = "MEP"))
+violinplot_RA_gmp <- ggplot(GMP_RA_melt, aes(x = variable, y = value)) +
+  geom_violin(fill = "grey") + theme(legend.position = "none", axis.title.x = element_blank()) +
+  ylab("Signature score") + scale_x_discrete(labels = c("GMP_signature_up" = "GMP", "CMP_signature_up" = "CMP", "MEP_signature_up" = "MEP"))
+
+
+ggsave(paste0(outdir, 'Vln_GMP_CMP_MEP', '.png'), plot = violinplot_gmp, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 10, units = 'cm', dpi = 300)
+ggsave(paste0(outdir, 'Vln_Ctrl_GMP_CMP_MEP', '.png'), plot = violinplot_Ctrl_gmp, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 10, units = 'cm', dpi = 300)
+ggsave(paste0(outdir, 'Vln_RA_GMP_CMP_MEP', '.png'), plot = violinplot_RA_gmp, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 10, units = 'cm', dpi = 300)
+# UMAP
+
+# Can't plot after subseting on MEP_broad because there is only one cell
+Idents(seurat) <- "cellType_Actinn_GMP"
+
+seurat_gmp <- subset(seurat, idents = "GMP_broad")
+seurat_cmp <- subset(seurat, idents = "CMP_broad")
+# seurat_mep <- subset(seurat, idents = "MEP_broad")
+
+Idents(seurat) <- "FinalCluster"
+
+UMAP_actinn_GMP <- DimPlot(seurat_gmp, group.by = "cellType_Actinn_GMP", pt.size = 0.4) +
+  theme(axis.title = element_blank(), axis.text = element_blank(), legend.position = "none") +
+  scale_color_manual(values = "grey")
+
+UMAP_actinn_CMP <- DimPlot(seurat_cmp, group.by = "cellType_Actinn_GMP", pt.size = 0.4) +
+  theme(axis.title = element_blank(), axis.text = element_blank(), legend.position = "none") +
+  scale_color_manual(values = "grey")
+
+# UMAP_actinn_MEP <- DimPlot(seurat_mep, group.by = "cellType_Actinn_GMP")
+
+UMAP_actinn_gmp_split <- DimPlot(seurat, group.by = "cellType_Actinn_GMP", split.by = "cellType_Actinn_GMP", pt.size = 0.4) +
+  theme(axis.title = element_blank(), axis.text = element_blank(), legend.position = "none") + ggtitle("") +
+  scale_color_manual(values = c("grey", "grey", "grey"))
+
+
+ggsave(paste0(outdir, 'UMAP_GMP_split', '.png'), plot = UMAP_actinn_gmp_split, device = 'png', path = NULL,
+       scale = 1, width = 25, height = 15, units = 'cm', dpi = 300)
+ggsave(paste0(outdir, 'UMAP_GMP', '.png'), plot = UMAP_actinn_GMP, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 15, units = 'cm', dpi = 300)
+ggsave(paste0(outdir, 'UMAP_CMP', '.png'), plot = UMAP_actinn_CMP, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 15, units = 'cm', dpi = 300)
+
+
+saveRDS(seurat, paste0(outdir, "seurat_actinn", ".rds"))
\ No newline at end of file
diff --git a/R_src/assignSurfaceMarkerSeuratCL.R b/R_src/assignSurfaceMarkerSeuratCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..8160f87ecf34a7fce840af44a7cff7d5f57ef922
--- /dev/null
+++ b/R_src/assignSurfaceMarkerSeuratCL.R
@@ -0,0 +1,89 @@
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+library(Seurat)
+library(monocle)
+library(plyr)
+library(ggplot2)
+library(getopt)
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  "inputSeurat",  "i",1, "character", "input seurat object with cluster column names numclust",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "surfaceMarkers", "s", 1, "character", "List of surface marker split by \\+",
+  "namePop", "n", 1, "character", "Name of the double pos population"
+), byrow=TRUE, ncol=5);
+
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputSeurat)) {
+  cat("Create influence graph from regulon table")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+if(is.null(opt$outdir)){
+  opt$outdir <- "./"
+}
+
+theme_set(theme_classic())
+
+# Testing option
+opt$outdir <- "/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/RNA/Seurat4_integration/Analysis/"
+opt$inputSeurat <- readRDS("/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/RNA/regulonAnalysis/seuratAUC_2.rds")
+opt$surfaceMarkers <- "Ly6g+Itgam"
+opt$namePop <- "Diff"
+
+outdir <- opt$outdir
+
+# Read surface markers 
+surfaceMarkers <- strsplit(opt$surfaceMarkers, split = "\\+")[[1]]
+
+# Open seurat then save it as CDS object
+seurat <- readRDS(opt$inputSeurat)
+seurat_cds <- as.CellDataSet(seurat, assay = "RNA")
+
+
+# Classify cells with surface markers
+cth <- newCellTypeHierarchy()
+
+SM_1_id <- row.names(subset(fData(seurat_cds), gene_short_name == surfaceMarkers[1]))
+SM_2_id <- row.names(subset(fData(seurat_cds), gene_short_name == surfaceMarkers[1]))
+
+cth <- addCellType(cth, opt$namePop, classify_func = 
+                     function(x){ x[SM_1_id,] >=1 & x[SM_2_id,] >=1})
+
+seurat_cds <- classifyCells(seurat_cds, cth, 0.1)
+
+# Add cellType information to the seurat object
+seurat@meta.data$CellType <- pData(seurat_cds)[rownames(seurat@meta.data), "CellType"]
+
+# Barplot cellType distribution
+summary_diff <- ddply(seurat@meta.data,~FinalCluster + CellType + condition, nrow)
+summary_diff$Diff_cell <- factor(summary_diff$CellType, levels = c("Unknown", "Diff"))
+
+cellType_control_bp <- ggplot(data = data.frame(summary_diff[summary_diff$condition %in% "Control",]), aes(x = FinalCluster, y = V1, fill = CellType)) + 
+  geom_bar(stat = "identity", position = position_dodge()) +
+  ylab(label = "") + xlab(label = "") + coord_flip()
+
+cellType_treated_bp <- ggplot(data = data.frame(summary_diff[summary_diff$condition %in% "Treated",]), aes(x = FinalCluster, y = V1, fill = CellType)) + 
+  geom_bar(stat = "identity", position = position_dodge()) +
+  ylab(label = "") + xlab(label = "") + coord_flip()
+
+ggsave(paste0(outdir, "BarPlot/", 'BP_CS_Markers_', opt$namePop, '_control', '.png'), plot = cellType_control_bp, device = 'png', path = NULL, 
+       scale = 1, width = 10, height = 10, units = 'cm', dpi = 300)
+ggsave(paste0(outdir, "BarPlot/", 'BP_CS_Markers_', opt$namePop, '_treated', '.png'), plot = cellType_treated_bp, device = 'png', path = NULL, 
+       scale = 1, width = 10, height = 10, units = 'cm', dpi = 300)
+
+saveRDS(seurat, paste0(outdir, "seurat_CT", ".rds"))
diff --git a/R_src/castle.R b/R_src/castle.R
new file mode 100644
index 0000000000000000000000000000000000000000..cca3dcaf06ed85830930cca142969934c50bcfad
--- /dev/null
+++ b/R_src/castle.R
@@ -0,0 +1,215 @@
+castle <- function(source_seurat,target_seurat,outdir = "./") {
+  set.seed(2018)
+  BREAKS=c(-1, 0, 1, 6, Inf)
+  nFeatures = 100
+  
+  dir.create(outdir,showWarnings = F)
+
+  # 1. Load datasets in scater format: loaded files expected to contain "Large SingleCellExperiment" object
+  
+  
+  # 1.1 convert from seurat (be careful seurat 2) to sce object if seurat object as input
+  source <- Convert(from = source_seurat, to = "sce")
+  target <- Convert(from = target_seurat, to = "sce")
+
+
+  
+  
+  ds1 = t(exprs(source)) 
+  ds2 = t(exprs(target)) 
+  sourceCellTypes = as.factor(colData(source)[,"library_id"])
+
+  # 2. Unify sets, excluding low expressed genes
+  source_n_cells_counts = apply(exprs(source), 1, function(x) { sum(x > 0) } )
+  print("source OK")
+  target_n_cells_counts = apply(exprs(target), 1, function(x) { sum(x > 0) } )
+  common_genes = intersect( rownames(source)[source_n_cells_counts>10], 
+                            rownames(target)[target_n_cells_counts>10]
+  )
+  remove(source_n_cells_counts, target_n_cells_counts)
+  ds1 = ds1[, colnames(ds1) %in% common_genes]
+  ds2 = ds2[, colnames(ds2) %in% common_genes]
+  ds = rbind(ds1[,common_genes], ds2[,common_genes])
+  isSource = c(rep(TRUE,nrow(ds1)), rep(FALSE,nrow(ds2)))
+  remove(ds1, ds2)
+  
+  # 3. Highest mean in both source and target
+  topFeaturesAvg = colnames(ds)[order(apply(ds, 2, mean), decreasing = T)]
+  
+  # 4. Highest mutual information in source
+  topFeaturesMi = names(sort(apply(ds[isSource,],2,function(x) { compare(cut(x,breaks=BREAKS),sourceCellTypes,method = "nmi") }), decreasing = T))
+  
+  # 5. Top n genes that appear in both mi and avg
+  selectedFeatures = union(head(topFeaturesAvg, nFeatures) , head(topFeaturesMi, nFeatures) )
+  
+  # 6. remove correlated features
+  tmp = cor(as.matrix(ds[,selectedFeatures]), method = "pearson")
+  tmp[!lower.tri(tmp)] = 0
+  selectedFeatures = selectedFeatures[apply(tmp,2,function(x) any(x < 0.9))]
+  remove(tmp)
+  
+  # 7,8. Convert data from continous to binned dummy vars
+  # break datasets to bins
+  dsBins = apply(ds[, selectedFeatures], 2, cut, breaks= BREAKS)
+  # use only bins with more than one value
+  nUniq = apply(dsBins, 2, function(x) { length(unique(x)) })
+  # convert to dummy vars
+  ds = model.matrix(~ . , as.data.frame(dsBins[,nUniq>1]))
+  remove(dsBins, nUniq)
+  
+  
+  # 9. Classify
+  train = runif(nrow(ds[isSource,]))<0.8
+  #slightly different setup for multiclass and binary classification
+  if (length(unique(sourceCellTypes)) > 2) {
+    xg=xgboost(data=ds[isSource,][train, ] ,
+               label=as.numeric(sourceCellTypes[train])-1,
+               objective="multi:softmax", num_class=length(unique(sourceCellTypes)),
+               eta=0.7 , nthread=5, nround=20, verbose=0,
+               gamma=0.001, max_depth=5, min_child_weight=10)
+  } else {
+    xg=xgboost(data=ds[isSource,][train, ] ,
+               label=as.numeric(sourceCellTypes[train])-1,
+               eta=0.7 , nthread=5, nround=20, verbose=0,
+               gamma=0.001, max_depth=5, min_child_weight=10)
+  }
+  
+  
+  
+  # 10. Predict
+  predictedClasses = predict(xg, ds[!isSource, ])
+  
+  target_seurat@meta.data$predicted <- predictedClasses
+ 
+  
+  conv <- data.frame(cellType = unique(sourceCellTypes),numCellTypes=unique(as.numeric(sourceCellTypes[train]))-1)
+  
+  
+  for (num in conv$numCellTypes) {
+    target_seurat@meta.data$predicted[target_seurat@meta.data$predicted==num] <- as.character(conv[which(conv$numCellType== num),"cellType"])
+  }    
+  
+  
+  
+  target_seurat@meta.data$predicted <- factor(target_seurat@meta.data$predicted,levels = c("LTHSC","STHSC", "MPP2", "MPP3"))
+  png(paste(outdir,"/cellTypeLearned_tsne.png", sep = ""),height = 800,width = 800)
+  TSNEPlot(target_seurat,group.by="predicted")
+  dev.off()
+  
+  
+  
+  return(target_seurat)
+}
+
+castleSeurat3 <- function(source_seurat,target_seurat,outdir = "./") {
+  set.seed(2018)
+  BREAKS=c(-1, 0, 1, 6, Inf)
+  nFeatures = 100
+  
+  dir.create(outdir,showWarnings = F)
+  
+  # 1. Load datasets in scater format: loaded files expected to contain "Large SingleCellExperiment" object
+  
+  
+  # 1.1 convert from seurat (be careful seurat 3) to sce object if seurat object as input
+  source <- as.SingleCellExperiment(source_seurat, to = "sce",data = "logcounts")
+  target <- as.SingleCellExperiment(target_seurat, to = "sce",data = "logcounts")
+  
+  
+  
+  
+  ds1 = t(exprs(source)) 
+  ds2 = t(exprs(target)) 
+  sourceCellTypes = as.factor(colData(source)[,"library_id"])
+  
+  # 2. Unify sets, excluding low expressed genes
+  source_n_cells_counts = apply(exprs(source), 1, function(x) { sum(x > 0) } )
+  print("source OK")
+  target_n_cells_counts = apply(exprs(target), 1, function(x) { sum(x > 0) } )
+  common_genes = intersect( rownames(source)[source_n_cells_counts>10], 
+                            rownames(target)[target_n_cells_counts>10]
+  )
+  remove(source_n_cells_counts, target_n_cells_counts)
+  ds1 = ds1[, colnames(ds1) %in% common_genes]
+  ds2 = ds2[, colnames(ds2) %in% common_genes]
+  ds = rbind(ds1[,common_genes], ds2[,common_genes])
+  isSource = c(rep(TRUE,nrow(ds1)), rep(FALSE,nrow(ds2)))
+  remove(ds1, ds2)
+  
+  # 3. Highest mean in both source and target
+  topFeaturesAvg = colnames(ds)[order(apply(ds, 2, mean), decreasing = T)]
+  
+  # 4. Highest mutual information in source
+  topFeaturesMi = names(sort(apply(ds[isSource,],2,function(x) { compare(cut(x,breaks=BREAKS),sourceCellTypes,method = "nmi") }), decreasing = T))
+  
+  # 5. Top n genes that appear in both mi and avg
+  selectedFeatures = union(head(topFeaturesAvg, nFeatures) , head(topFeaturesMi, nFeatures) )
+  
+  # 6. remove correlated features
+  tmp = cor(as.matrix(ds[,selectedFeatures]), method = "pearson")
+  tmp[!lower.tri(tmp)] = 0
+  selectedFeatures = selectedFeatures[apply(tmp,2,function(x) any(x < 0.9))]
+  remove(tmp)
+  
+  # 7,8. Convert data from continous to binned dummy vars
+  # break datasets to bins
+  dsBins = apply(ds[, selectedFeatures], 2, cut, breaks= BREAKS)
+  # use only bins with more than one value
+  nUniq = apply(dsBins, 2, function(x) { length(unique(x)) })
+  # convert to dummy vars
+  ds = model.matrix(~ . , as.data.frame(dsBins[,nUniq>1]))
+  remove(dsBins, nUniq)
+  
+  
+  # 9. Classify
+  train = runif(nrow(ds[isSource,]))<0.8
+  #slightly different setup for multiclass and binary classification
+  if (length(unique(sourceCellTypes)) > 2) {
+    xg=xgboost(data=ds[isSource,][train, ] ,
+               label=as.numeric(sourceCellTypes[train])-1,
+               objective="multi:softmax", num_class=length(unique(sourceCellTypes)),
+               eta=0.7 , nthread=5, nround=20, verbose=0,
+               gamma=0.001, max_depth=5, min_child_weight=10)
+  } else {
+    xg=xgboost(data=ds[isSource,][train, ] ,
+               label=as.numeric(sourceCellTypes[train])-1,
+               eta=0.7 , nthread=5, nround=20, verbose=0,
+               gamma=0.001, max_depth=5, min_child_weight=10)
+  }
+  
+  
+  
+  # 10. Predict
+  predictedClasses = predict(xg, ds[!isSource, ])
+  
+  target_seurat@meta.data$predicted <- predictedClasses
+ 
+  
+  conv <- data.frame(cellType = unique(sourceCellTypes),numCellTypes=unique(as.numeric(sourceCellTypes[train]))-1)
+  
+  
+  for (num in conv$numCellTypes) {
+    target_seurat@meta.data$predicted[target_seurat@meta.data$predicted==num] <- as.character(conv[which(conv$numCellType== num),"cellType"])
+  }    
+  
+  
+  
+  target_seurat@meta.data$predicted <- factor(target_seurat@meta.data$predicted,levels = c("LTHSC","STHSC", "MPP2", "MPP3"))
+  png(paste(outdir,"/cellTypeLearned_tsne.png", sep = ""),height = 800,width = 800)
+  DimPlot(target_seurat,group.by ="predicted")
+  dev.off()
+  
+  
+  
+  return(target_seurat)
+}
+
+
+transfertToMonocle <- function(target_seurat,target_monocle,outdir = "./") {
+  pData(target_monocle)$predicted <- target_seurat@meta.data$predicted
+  png(paste(outdir,"/cellTypeLearned_ddrtree.png", sep = ""),height = 800,width = 800)
+  print(plot_cell_trajectory(target_monocle,color_by = "predicted"))
+  dev.off()
+  return(target_monocle)
+}
+
diff --git a/R_src/castleSeurat3CL.R b/R_src/castleSeurat3CL.R
new file mode 100644
index 0000000000000000000000000000000000000000..e2d8cc61e7d82e99afa53a47e5719bf70ae13e1e
--- /dev/null
+++ b/R_src/castleSeurat3CL.R
@@ -0,0 +1,59 @@
+suppressMessages(library(gProfileR))
+suppressMessages(library(RColorBrewer))
+require(scales)
+suppressMessages(library(monocle))
+suppressMessages(library(Seurat))
+suppressMessages(library(scater))
+suppressMessages(library(xgboost))
+suppressMessages(library(igraph))
+suppressMessages(library(getopt))
+
+source("R_src/getSigPerCells.R")
+source("R_src/Enrichment.R")
+source("R_src/castle.R")
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'sourceRDS',  'i', 1, "character", "REQUIRED: 10X data to trained classifier prepared as Seurat object (.RDS generated by RodriguezAnalysis.R).",
+  'targetRDS',  't', 1, "character", "REQUIRED: 10X data targeted prepared as seurat object (.RDS generated by SeuratCL.R (filtered, normalized)).",
+  'targetMonocleRDS', 'm', 1, "character", "REQUIRED: 10X data targeted prepared as monocle object (.RDS generated by seuratMonocleOrdering.R).",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)'
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+if ( !is.null(opt$help) | is.null(opt$sourceRDS)) {
+  cat("Perform perform cell type assignation using a source dataset and castle XGradientBoost tool")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+
+source_seurat <- readRDS(opt$sourceRDS)
+target_seurat <- readRDS(opt$targetRDS)
+
+
+
+  seurat <- castleSeurat3(source_seurat,target_seurat,outdir = opt$outdir)
+  
+  if (!is.null(opt$targetMonocleRDS)) {
+    target_monocle <- readRDS(opt$targetMonocleRDS)
+    monocle <- transfertToMonocle(seurat,target_monocle,outdir = opt$outdir)
+    saveRDS(monocle,paste(opt$outdir,"/monocleTrainedCt.rds",sep =""))
+    
+  }
+  
+  png(paste(opt$outdir,"/CellTypeLearnedPropGG.png",sep = ""))
+  pie <- ggplot(seurat@meta.data,
+                aes(x = factor(1), fill = factor(predicted))) + geom_bar(width = 1)
+  pie <- pie + coord_polar(theta = "y") +
+    theme(axis.title.x = element_blank(), axis.title.y = element_blank())
+  print(pie)
+  dev.off()
+  
+  png(paste(opt$outdir,"/CellTypeLearnedProp.png",sep = ""))
+  pie(table(seurat@meta.data$predicted),labels = names(table(seurat@meta.data$predicted)),col = hue_pal()(4),main = paste("predicted prop"))
+  dev.off()
+  
+  saveRDS(seurat,paste(opt$outdir,"/seuratTrainedCt.rds",sep = ""))
+
diff --git a/R_src/computeDiffFun.R b/R_src/computeDiffFun.R
new file mode 100644
index 0000000000000000000000000000000000000000..e0c2cc69611dcbe98c298ed4451a121510fa1d84
--- /dev/null
+++ b/R_src/computeDiffFun.R
@@ -0,0 +1,218 @@
+#Functions to compute difference of feature in seurat object (because Seurat compute logFC)
+
+getExtAvgDiff <- function(row) {
+  res <- NA
+  if(row[1] > 0 & row[2] > 0) {
+    res <- min(c(row))
+  }
+  if(row[1] < 0 & row[2] < 0) {
+    res <- max(c(row))
+  }
+  return(res)
+}
+
+featureDiff <- function(seurat,cells.1,cells.2,feature) {
+  data <- GetAssayData(seurat,slot = "data")
+  total.diff <- mean(data[feature,cells.1]) - mean(data[feature,cells.2])
+  return(total.diff)
+} 
+
+
+getTrueDiff <- function (seurat,table,colIdent = "numclust",suffix = "",colTest="AGE") {
+  
+  if(!is.null(levels(seurat[[colTest]]))) {
+    factors <- levels(seurat@meta.data[,colTest])
+  } else {
+    factors <- unique(seurat@meta.data[,colTest])
+  }
+  factors <- as.vector(factors)
+
+  table[,paste("avg_diff",suffix,sep = "")] <- NA
+  
+  for (rm in rownames(table)) {
+    feature <- table[rm,"Gene"]
+    cells.1 <- colnames(seurat)[which(seurat[[colIdent]] == table[rm,colIdent] & seurat[[colTest]] == factors[1])]
+    cells.2 <- colnames(seurat)[which(seurat[[colIdent]] == table[rm,colIdent] & seurat[[colTest]] == factors[2])]
+    table[rm,paste("avg_diff",suffix,sep = "")] <- featureDiff(seurat,cells.1,cells.2,feature)
+  }
+  
+  return(table)
+}
+
+
+## Test functions
+
+FindMarkerPerClustGroupVar <- function(cluster,
+                                       hspc.combined,
+                                       condition ="AGE",        ## only var with two factors allowed
+                                       grouping.var = "platform",
+                                       max.cells.per.ident = Inf,
+                                       filterOnPadj = T,
+                                       logfc.threshold = 0.25,
+                                       min.pct = 0.1,
+                                       keepDiverging = F,
+                                       test.use = "wilcox",
+                                       identCol = "numclust",
+                                       pseudocount.use = 1,
+                                       computeTrueDiff= F) {
+  
+  clusterCondition <- paste0("cluster.",condition)
+  if(!is.null(levels(seurat@meta.data[,condition]))) {
+    factors <- levels(seurat@meta.data[,condition])
+  } else {
+    factors <- unique(seurat@meta.data[,condition])
+  }
+  
+  hspc.combined@meta.data[,clusterCondition] <- paste(hspc.combined@meta.data[,identCol],
+                                                      hspc.combined@meta.data[,condition], sep = "_")
+  
+  test <- table(hspc.combined@meta.data[,clusterCondition], hspc.combined@meta.data[,grouping.var])
+  groups = unique(hspc.combined@meta.data[,grouping.var])
+  
+  ident1 = paste0(cluster,"_",factors[1])
+  ident2 = paste0(cluster,"_",factors[2])
+  
+  
+  if( (test[ident1,groups[1]]>3 & test[ident1,groups[2]]>3) &
+      (test[ident2,groups[1]]>3&test[ident2,groups[2]]>3)) {
+    
+    Idents(object = hspc.combined) <- clusterCondition
+    
+    
+    
+    
+    markers <- FindConservedMarkers(hspc.combined,
+                                    assay = DefaultAssay(hspc.combined),
+                                    grouping.var = grouping.var,
+                                    test.use=test.use,
+                                    ident.1 = ident1,
+                                    ident.2 = ident2,
+                                    pseudocount.use = pseudocount.use,
+                                    min.pct = min.pct,
+                                    logfc.threshold = logfc.threshold,
+                                    max.cells.per.ident = max.cells.per.ident)
+    
+    print(dim(markers))
+    #Add a column the min abs(logFC) 
+    markers$min_avg_logFC <- NA
+    
+    if(!keepDiverging) {
+      #print("keep only markers with conserved logfc sign")
+      markers <- markers[which(markers[,paste0(groups[1],"_avg_logFC")]/markers[,paste0(group[2],"_avg_logFC")] > 0),]
+    } else {
+      #print("combined p_val of diverging markers between two batch are set to 1 and their min_avg_logfc to 0")
+      markers[which(markers[,paste0(groups[1],"_avg_logFC")]/markers[,paste0(group[2],"_avg_logFC")] > 0),"min_avg_logFC"] <- 0
+      markers[which(markers[,paste0(groups[1],"_avg_logFC")]/markers[,paste0(group[2],"_avg_logFC")] > 0),"minimump_p_val"] <- 1
+    }
+    
+    for (g in rownames(markers)) {
+      if(markers[g,paste0(groups[1],"_avg_logFC")] < 0) {
+        markers[g,"min_avg_logFC"] <- max(markers[g,c(paste0(groups[1],"_avg_logFC"),paste0(groups[2],"_avg_logFC"))])
+      } else {
+        markers[g,"min_avg_logFC"] <- min(markers[g,c(paste0(groups[1],"_avg_logFC"),paste0(groups[2],"_avg_logFC"))])
+      }
+    }
+    
+    markers$Cluster <- paste0(ident1,"_up")
+    markers$Cluster[which(markers$min_avg_logFC < 0)] <-  paste0(ident1,"_down")
+    markers$Gene <- rownames(markers)
+    markers <- markers[order(markers$min_avg_logFC),]
+    
+    if(computeTrueDiff) {
+      markers$numclust <- str_split_fixed(markers$Cluster,pattern = "_",n=3)[,1]
+      
+      
+      markers <- getTrueDiff(seurat[,which(seurat[[grouping.var]] == group[1])],
+                                  markers,
+                                  colIdent = "numclust",
+                                  colTest = condition,
+                                  suffix = paste0("_",group[1]))
+      
+      markers <- getTrueDiff(seurat[,which(seurat[[grouping.var]] == group[2])],
+                                  markers,
+                                  colIdent = "numclust",
+                                  colTest = condition,
+                                  suffix = paste0("_",group[2]))
+      
+      
+      markers$min_avg_diff <- apply(markers[,c(paste0("avg_diff_",group[1]),paste0("avg_diff_",group[2]))],
+                                    1,
+                                    FUN=getExtAvgDiff) 
+    }
+    
+    
+    if (filterOnPadj) {
+      markers <- markers[which(markers$A_p_val_adj < 0.05 & markers$B_p_val_adj < 0.05),]
+    }
+  }else {
+    markers <- NULL 
+  }
+  return(markers)
+  
+}
+
+FindMarkerPerClust <- function(cluster,
+                               hspc.combined,
+                               condition ="AGE",        ## only var with two factors allowed
+                               max.cells.per.ident = Inf,
+                               logfc.threshold = 0.25,
+                               min.pct = 0.1,
+                               test.use = "wilcox",
+                               identCol = "numclust",
+                               pseudocount.use = 1,
+                               computeTrueDiff = F) {
+  
+  clusterCondition <- paste0("cluster.",condition)
+  if(!is.null(levels(seurat@meta.data[,condition]))) {
+    factors <- levels(seurat@meta.data[,condition])
+  } else {
+    factors <- unique(seurat@meta.data[,condition])
+  }
+  
+  hspc.combined@meta.data[,clusterCondition] <- paste(hspc.combined@meta.data[,identCol],
+                                                      hspc.combined@meta.data[,condition], sep = "_")
+  
+  test <- table(hspc.combined@meta.data[,clusterCondition])
+  
+  ident1 = paste0(cluster,"_",factors[1])
+  ident2 = paste0(cluster,"_",factors[2])
+  
+  
+  if( test[ident1]>3 & test[ident2]>3) {
+    
+    Idents(object = hspc.combined) <- clusterCondition
+    
+    markers <- FindMarkers(hspc.combined,
+                           assay = DefaultAssay(hspc.combined),
+                           test.use=test.use,
+                           ident.1 = ident1,
+                           ident.2 = ident2,
+                           pseudocount.use = pseudocount.use,
+                           min.pct = min.pct,
+                           logfc.threshold = logfc.threshold,
+                           max.cells.per.ident = max.cells.per.ident)
+    
+    markers$Cluster <- paste0(ident1,"_up")
+    markers$Cluster[which(markers$avg_logFC < 0)] <-  paste0(ident1,"_down")
+    markers$Gene <- rownames(markers)
+    markers <- markers[order(markers$avg_logFC),]
+    
+    if(computeTrueDiff) {
+      markers$numclust <- str_split_fixed(markers$Cluster,pattern = "_",n=3)[,1]
+      
+      
+      markers <- getTrueDiff(seurat,
+                             markers,
+                             colIdent = "numclust",
+                             colTest = condition
+                             )
+      
+     }
+    
+  } else {
+    markers <- NULL 
+  }
+  return(markers)
+  
+}
+
diff --git a/R_src/data_preparation.R b/R_src/data_preparation.R
new file mode 100644
index 0000000000000000000000000000000000000000..70f735b86e0481e39574772038e25a770b8f6afe
--- /dev/null
+++ b/R_src/data_preparation.R
@@ -0,0 +1,133 @@
+# R function to prepare single cell data for monocle & Seurat
+
+getCellCyclePhases <- function(gbm_cds,outdir = "./") {
+  
+  dir.create(path = outdir,recursive = T,showWarnings = F)
+  gene_count_matrix <- as.matrix(exprs(gbm_cds))
+  set.seed(100)
+  mm.pairs <- readRDS(system.file("exdata", "mouse_cycle_markers.rds", package="scran"))
+  assignments <- cyclone(gene_count_matrix, mm.pairs, gene.names=rownames(gene_count_matrix))
+
+  ## add cell cycle phase to pData
+  pData(gbm_cds)$phases <- assignments$phases
+  pData(gbm_cds)$G1_score <- assignments$scores$G1
+  pData(gbm_cds)$G2M_score <- assignments$scores$G2M
+  pData(gbm_cds)$S_score <- assignments$scores$S
+  pData(gbm_cds)[which(pData(gbm_cds)$phases=="G1"),"phases"] <- "G1_G0"
+  pData(gbm_cds)[which(pData(gbm_cds)$phases=="G2M"),"phases"] <- "G2_M"
+  
+  
+  png(paste(outdir,"/cell_cycle_phases_assignment_on_umi.png",sep = ""))
+  plot(assignments$score$G1, assignments$score$G2M, 
+       xlab="G1 score", ylab="G2/M score", pch=16)
+  dev.off()
+  
+  print("Cell cycle phases added..")
+  return(gbm_cds)
+}
+
+getCellCyclePhasesSeurat <- function(seurat,outdir = "./") {
+  
+  dir.create(path = outdir,recursive = T,showWarnings = F)
+  gene_count_matrix <- as.matrix(GetAssayData(seurat,slot = 'counts',assay = "RNA"))
+  set.seed(100)
+  mm.pairs <- readRDS(system.file("exdata", "mouse_cycle_markers.rds", package="scran"))
+  assignments <- cyclone(gene_count_matrix, mm.pairs, gene.names=rownames(gene_count_matrix))
+  
+  ## add cell cycle phase to pData
+  seurat@meta.data$phases <- assignments$phases
+  seurat@meta.data$G1_score <- assignments$scores$G1
+  seurat@meta.data$G2M_score <- assignments$scores$G2M
+  seurat@meta.data$S_score <- assignments$scores$S
+  seurat@meta.data[which(seurat@meta.data$phases=="G1"),"phases"] <- "G1_G0"
+  seurat@meta.data[which(seurat@meta.data$phases=="G2M"),"phases"] <- "G2_M"
+  
+  
+  png(paste(outdir,"/cell_cycle_phases_assignment_on_umi.png",sep = ""))
+  plot(assignments$score$G1, assignments$score$G2M, 
+       xlab="G1 score", ylab="G2/M score", pch=16)
+  dev.off()
+  
+  print("Cell cycle phases added..")
+  return(seurat)
+}
+
+
+
+addPercentMitoch <- function(gbm_cds) {      #in fact this is fraction of mitochondrial transcripts
+  mitoGenes <- grep(pattern = "mt-",x=fData(gbm_cds)$gene_short_name,value = T)
+  mitoGenesEnsembl <- rownames(gbm_cds)[which(is.element(el = fData(gbm_cds)$gene_short_name,set = mitoGenes))]
+  percentMito <- Matrix::colSums(as.matrix(exprs(gbm_cds))[mitoGenesEnsembl, ])/Matrix::colSums(as.matrix(exprs(gbm_cds)))
+  pData(gbm_cds)$percentMito <- percentMito
+  return(gbm_cds)
+}
+
+
+#Filtering low-quality cells
+
+filterCells <- function(gbm_cds,outdir="./",num_cells_expressed=10,min_expr=0.1,propMitochFilter=NULL) {
+  
+  dir.create(path = outdir,recursive = T,showWarnings = F)
+  
+  #Set detection threshold
+  gbm_cds <- detectGenes(gbm_cds, min_expr = min_expr)
+  
+  #Define gene expressed as gene detected in more than n cell (only use for plotting distribution)
+  expressed_genes <- row.names(subset(fData(gbm_cds),            
+                                    num_cells_expressed >= num_cells_expressed))
+
+  #filtering cells as explained in monocle doc, cutting distribution tails
+  #eg. remove cells with no RNA or too much RNA
+
+  pData(gbm_cds)$Total_mRNAs <- Matrix::colSums(exprs(gbm_cds))
+  
+  png(paste(outdir,"/densityTotalmRNA_raw.png",sep =""))
+  qplot(Total_mRNAs, data = pData(gbm_cds), geom ="density")
+  dev.off()
+  
+  upper_bound <- 10^(mean(log10(pData(gbm_cds)$Total_mRNAs)) +
+                     2*sd(log10(pData(gbm_cds)$Total_mRNAs)))
+  lower_bound <- 10^(mean(log10(pData(gbm_cds)$Total_mRNAs)) -
+                     2*sd(log10(pData(gbm_cds)$Total_mRNAs)))
+
+  png(paste(outdir,"/densityWithFirstFilter.png",sep =""))
+  print(qplot(Total_mRNAs, data = pData(gbm_cds), geom ="density") + 
+          geom_vline(xintercept = lower_bound) + 
+          geom_vline(xintercept = upper_bound))
+  dev.off()
+  
+  gbm_cds <- gbm_cds[,pData(gbm_cds)$Total_mRNAs > lower_bound &
+                       pData(gbm_cds)$Total_mRNAs < upper_bound]
+
+  # redefining expressed gene after cells filtering
+  gbm_cds <- detectGenes(gbm_cds, min_expr = min_expr)
+  expressed_genes <- row.names(subset(fData(gbm_cds),num_cells_expressed >= num_cells_expressed)) #genes expressed in at least 10 cells of the data set.
+  
+  # Log-transform each value in the expression matrix.
+  L <- log(exprs(gbm_cds[expressed_genes,]))
+
+  # Standardize each gene, so that they are all on the same scale,
+  # Then melt the data with plyr so we can plot it easily
+  melted_dens_df <- reshape2::melt(Matrix::t(scale(Matrix::t(L))))
+
+  # Plot the distribution of the standardized gene expression values.
+  png(paste(outdir,"/standardized_distribution.png",sep =""))
+  print(qplot(value, geom = "density", data = melted_dens_df) +
+        stat_function(fun = dnorm, size = 0.5, color = 'red') +
+        xlab("Standardized log(UMIcounts)") +
+        ylab("Density"))
+  dev.off()
+  
+  ## Visualize percentage of mitochondiral genes
+  gbm_cds <- addPercentMitoch(gbm_cds)
+  
+  png(paste(outdir,"/percent_mito.png",sep =""))
+  plot(hist(gbm_cds$percentMito*100,breaks = 100),main = "percentage of mitochondrial transcripts")
+  abline(v=propMitochFilter*100)
+  dev.off()
+  
+  return(gbm_cds)
+
+}
+
+
diff --git a/R_src/do_consensus.R b/R_src/do_consensus.R
new file mode 100644
index 0000000000000000000000000000000000000000..9e50b986eddb951f96d941f855591655baabe0bf
--- /dev/null
+++ b/R_src/do_consensus.R
@@ -0,0 +1,26 @@
+do_consensus <- function(path){
+
+# Opening the predicted files
+predicted_files <- list.files(path)
+predicted_names <- strsplit(predicted_files, split = ".txt")
+predicted_list <- list()
+for(i in 1:length(predicted_files)){
+  name <- predicted_names[[i]]
+  print(name)
+  predicted_list[[name]] <- assign(predicted_names[[i]], read.table(paste0(path, predicted_files[i]), row.names = 1, header = T))
+}
+# adding all the columns to a single table 
+predicted_table <- do.call("cbind", predicted_list)
+# convert to dataframe
+predicted_data <- data.frame(predicted_table)
+# transpose
+predicted_data <- t(predicted_data)
+# creation "Consensus"
+predicted_consensus=data.frame(Consensus = 1:ncol(predicted_data))
+for (i in 1:ncol(predicted_data)){
+  table_compteur <- table(predicted_data[,i])
+  predicted_consensus[i,1] <- names(which.max(table_compteur))
+  rownames(predicted_consensus)[i] <- colnames(predicted_data)[i]
+}
+return(predicted_consensus)
+}
diff --git a/R_src/filterForDcaScenicCL.R b/R_src/filterForDcaScenicCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..47b93ef58c87a29a433534890d91511a91036615
--- /dev/null
+++ b/R_src/filterForDcaScenicCL.R
@@ -0,0 +1,85 @@
+# Script to make analyzis of pseudotime with a gbm_cds ordered 
+# command line interface
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+
+suppressMessages(library(monocle))
+suppressMessages(library(getopt))
+suppressMessages(library(biomaRt))
+
+source("R_src/Enrichment.R")
+
+
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputData',  'i', 1, "character", "REQUIRED: matrix data (.tsv generated by the report) from 10X data ordered by monocle.",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  'firstFilter',   'f',1,"numeric", 'Keep only the genes with at least N = f(X) 2 mean imputed counts across all samples (X=2 by default) \n(e.g. the total number the gene would have, if it was expressed with a value of X in 1% of the cells). \nAdjust X value according to the dataset (it will depend on the dataset units, e.g. UMI, TPMs???).',
+  'secondFilter',  's',1, "numeric", 'Keep the genes that are detected in at least X% of the cells (X=1% by default)'
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputData)) {
+  cat("Gene filtering of a gbm cds ordered. For scenic use, two filters")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+#set default arguments values
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+if (is.null(opt$firstFilter)) {
+  opt$firstFilter = 2
+}
+if (is.null(opt$secondFilter)) {
+  opt$secondFilter = 0.01
+}
+
+data <- read.table(opt$inputData,sep = "\t",header = T,row.names = 1)
+
+genes <- list()
+genes$names <- rownames(data)
+
+
+nCellsPerGene <- apply(data, 1, function(x) sum(x>0))
+nCountsPerGene <- apply(data, 1, sum)
+summary(nCellsPerGene)
+sum(data>0) / sum(data==0)
+
+#first filter
+minReads <- opt$firstFilter*.01*ncol(data)
+genesLeftMinReads <- names(nCountsPerGene)[which(nCountsPerGene > minReads)]
+length(genesLeftMinReads)
+
+#second filter
+minSamples <- ncol(data)*opt$secondFilter
+nCellsPerGene2 <- nCellsPerGene[genesLeftMinReads]
+genesLeftMinCells <- names(nCellsPerGene2)[which(nCellsPerGene2 > minSamples)]
+length(genesLeftMinCells)
+
+
+
+t_data <- t(data)
+
+print(head(genesLeftMinCells))
+
+write.table(t_data[,c(genesLeftMinCells)],file = paste(opt$outdir,"/expressionRawCountFilteredForScenic.tsv",sep=""),sep = '\t',quote = F)
+
+
+
+
+
+
diff --git a/R_src/filterForScenicCL.R b/R_src/filterForScenicCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..b1175633ca29a5b0836a1783648623e92ca2a831
--- /dev/null
+++ b/R_src/filterForScenicCL.R
@@ -0,0 +1,120 @@
+# Script to make analyzis of pseudotime with a gbm_cds ordered 
+# command line interface
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+# http://stackoverflow.com/questions/4090169/elegant-way-to-check-for-missing-packages-and-install-them
+
+
+suppressMessages(library(monocle))
+suppressMessages(library(getopt))
+
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputRDS',  'i', 1, "character", "REQUIRED: 10X data ordered by monocle (.RDS generated by orderCL.R).",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  'firstFilter',   'f',1,"numeric", 'Keep only the genes with at least N = f(X) UMI counts across all samples (X=2 by default) \n(e.g. the total number the gene would have, if it was expressed with a value of X in 1% of the cells). \nAdjust X value according to the dataset (it will depend on the dataset units, e.g. UMI, TPMs???).',
+  'secondFilter',  's',1, "numeric", 'Keep the genes that are detected in at least X% of the cells (X=1% by default)',
+  'markerTable', 'm',1, "character", "Markers table for the identification of transcription factors",
+  'mouseTF','t', 1, 'character', 'Mouse transcription factors list to identify TF in markers table'
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputRDS)) {
+  cat("Gene filtering of a gbm cds ordered. For scenic use, two filters")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+#set default arguments values
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+if (is.null(opt$firstFilter)) {
+  opt$firstFilter = 2
+}
+if (is.null(opt$secondFilter)) {
+  opt$secondFilter = 0.01
+}
+
+if(!is.null(opt$markerTable)) {
+  print("find TF in marker table...")
+  markerTable <- read.table(opt$markerTable,sep = "\t",header = T)
+  TFlist <- read.table(opt$mouseTF,header = F)
+  TF_markers <- data.frame(external_gene_name = markerTable$gene[which(markerTable$gene %in% TFlist$V1)])
+  
+  #Add tf from bonzanni plus Gfi1b
+  TF_bonzani <- data.frame(external_gene_name = c("Spi1","Tal1","Zfpm1","Cbfa2t3","Erg","Fli1","Gata1","Gata2","Hhex","Runx1","Smad6","Zbtb16","Gfi1b"))
+  TF_markers <- rbind(TF_markers,TF_bonzani)
+  TF_markers <- unique(TF_markers$external_gene_name)
+  
+  write.table(TF_markers,paste(opt$outdir,"/markers_and_bonzanni_TF.tsv",sep =""),
+              sep = "\t",quote = F,row.names = F, col.names = F)
+}
+
+gbm_cds <- readRDS(opt$inputRDS)
+
+# Convert to monocle object if necessary (filtering script initially made for monocle)
+
+if(is(gbm_cds) == "Seurat") {
+  #It is easier to pass by a monocle object and then reget the seurat object with the genes filtered
+  print("input seurat object converting it to monocle")
+  DefaultAssay(object = gbm_cds) <- "RNA"
+  
+  pd <- new("AnnotatedDataFrame", data = gbm_cds@meta.data)
+  fd <- new("AnnotatedDataFrame", data = data.frame(gene_short_name = rownames(gbm_cds))) 
+  rownames(fd) <- fd$gene_short_name
+  
+  
+  gbm_cds <- newCellDataSet(GetAssayData(gbm_cds,slot = "counts"),
+                            phenoData = pd,
+                            featureData = fd,
+                            lowerDetectionLimit = 0.1,
+                            expressionFamily = negbinomial.size())
+  
+  gbm_cds <- detectGenes(gbm_cds, min_expr = 0.1)
+  
+  
+  
+}
+
+exp <- as.matrix(exprs(gbm_cds))
+geneShortName<-make.unique(as.character(fData(gbm_cds)$gene_short_name)) # to see between make.unique, remove dup..
+rownames(exp) <- geneShortName
+
+nCellsPerGene <- apply(exp, 1, function(x) sum(x>0))
+nCountsPerGene <- apply(exp, 1, sum)
+summary(nCellsPerGene)
+sum(exp>0) / sum(exp==0)
+
+#first filter
+minReads <- opt$firstFilter*.01*ncol(exp)
+genesLeftMinReads <- names(nCountsPerGene)[which(nCountsPerGene > minReads)]
+length(genesLeftMinReads)
+
+#second filter
+minSamples <- ncol(exp)*opt$secondFilter
+nCellsPerGene2 <- nCellsPerGene[genesLeftMinReads]
+genesLeftMinCells <- names(nCellsPerGene2)[which(nCellsPerGene2 > minSamples)]
+length(genesLeftMinCells)
+
+t_exp <- t(exp)
+
+print(head(genesLeftMinCells))
+
+write.table(t_exp[,genesLeftMinCells],file = paste(opt$outdir,"/expressionRawCountFilteredForScenic.tsv",sep=""),sep = '\t',quote = F)
+
+
+
+
+
diff --git a/R_src/filterSeuratForScenicCL.R b/R_src/filterSeuratForScenicCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..7042db13561308c400b3e349bd749955b61bb715
--- /dev/null
+++ b/R_src/filterSeuratForScenicCL.R
@@ -0,0 +1,88 @@
+# Script to make analyzis of pseudotime with a gbm_cds ordered 
+# command line interface
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+# http://stackoverflow.com/questions/4090169/elegant-way-to-check-for-missing-packages-and-install-them
+
+
+suppressMessages(library(Seurat))
+suppressMessages(library(getopt))
+
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputRDS',  'i', 1, "character", "REQUIRED: 10X data ordered by monocle (.RDS generated by orderCL.R).",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  'firstFilter',   'f',1,"numeric", 'Keep only the genes with at least N = f(X) UMI counts across all samples (X=2 by default) \n(e.g. the total number the gene would have, if it was expressed with a value of X in 1% of the cells). \nAdjust X value according to the dataset (it will depend on the dataset units, e.g. UMI, TPMs???).',
+  'secondFilter',  's',1, "numeric", 'Keep the genes that are detected in at least X% of the cells (X=1% by default)',
+  'mouseTF','t', 1, 'character', 'Mouse transcription factors list to identify TF in markers table'
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputRDS)) {
+  cat("Gene filtering of a gbm cds ordered. For scenic use, two filters")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+#set default arguments values
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+if (is.null(opt$firstFilter)) {
+  opt$firstFilter = 2
+}
+if (is.null(opt$secondFilter)) {
+  opt$secondFilter = 0.01
+}
+
+seurat <- readRDS(opt$inputRDS)
+
+DefaultAssay(object = seurat) <- "RNA"
+
+exp <- as.matrix(GetAssayData(seurat,slot = "counts"))
+
+
+nCellsPerGene <- apply(exp, 1, function(x) sum(x>0))
+nCountsPerGene <- apply(exp, 1, sum)
+summary(nCellsPerGene)
+sum(exp>0) / sum(exp==0)
+
+#first filter
+minReads <- opt$firstFilter*.01*ncol(exp)
+genesLeftMinReads <- names(nCountsPerGene)[which(nCountsPerGene > minReads)]
+length(genesLeftMinReads)
+
+#second filter
+minSamples <- ncol(exp)*opt$secondFilter
+nCellsPerGene2 <- nCellsPerGene[genesLeftMinReads]
+genesLeftMinCells <- names(nCellsPerGene2)[which(nCellsPerGene2 > minSamples)]
+length(genesLeftMinCells)
+
+t_exp <- t(exp)
+
+print(head(genesLeftMinCells))
+
+
+
+
+t_exp <- t(exp)
+
+print(head(genesLeftMinCells))
+
+write.table(t_exp[,genesLeftMinCells],file = paste(opt$outdir,"/expressionRawCountFilteredForScenic.tsv",sep=""),sep = '\t',quote = F)
+
+
+
+
+
diff --git a/R_src/funForAnalysisSeurat.R b/R_src/funForAnalysisSeurat.R
new file mode 100755
index 0000000000000000000000000000000000000000..a24ae690504605424c3ceb5d797f8c69d46919d8
--- /dev/null
+++ b/R_src/funForAnalysisSeurat.R
@@ -0,0 +1,388 @@
+#Visualize position of a cluster on the UMAP
+clusterDistrib_seurat <- function(seurat = seurat, numclust = numclust, metaData = "numclust", flip = FALSE){
+  print(numclust)
+  seurat@meta.data$clusterDis <- FALSE
+  seurat@meta.data[which(seurat@meta.data[,metaData] == numclust),"clusterDis"] <- TRUE
+  print(nrow(seurat@meta.data[which(seurat@meta.data$clusterDis == TRUE),]))
+  seurat@meta.data$clusterDis <- factor(seurat@meta.data$clusterDis, levels = c(TRUE, FALSE))
+  if(flip == TRUE){
+    plot <- DimPlot(seurat, group.by = "clusterDis", pt.size = 0.01) + labs(title = numclust) + NoLegend() +
+      theme(axis.title = element_text(size = 6), axis.text = element_text(size = 6), title = element_text(size = 7)) +
+      scale_colour_manual(values = c("red", "grey")) +
+      scale_alpha_manual(values = c(1,0)) + coord_flip() + scale_y_reverse()
+  }else{
+    plot <- DimPlot(seurat, group.by = "clusterDis", pt.size = 0.01) + labs(title = numclust) + NoLegend() +
+      theme(axis.title = element_text(size = 6), axis.text = element_text(size = 6), title = element_text(size = 7)) +
+      scale_colour_manual(values = c("red", "grey")) +
+      scale_alpha_manual(values = c(1,0))
+  }
+  return(plot)
+}
+
+clusterDistrib_seurat_cond <- function(seurat = seurat, numclust = numclust, metaData = "numclust", flip = FALSE, color){
+  print(numclust)
+  seurat@meta.data$clusterDis <- "Other"
+  seurat@meta.data[which(seurat@meta.data[,metaData] == numclust),"clusterDis"] <- numclust
+  seurat@meta.data$clusterCondDis <- paste(seurat$clusterDis, seurat$condition, sep = "_")
+  # colorClusterCond <- c(colorGenotype, alpha(colorGenotype, 0.4), rep("grey", 4))
+  colorClusterCond <- c(color, rep("grey", 4))
+  # print(nrow(seurat@meta.data[which(seurat@meta.data$clusterDis == TRUE),]))
+  seurat@meta.data$clusterCondDis <- factor(seurat@meta.data$clusterCondDis, 
+                                            levels = c(paste(numclust, "Young_Wild", sep = "_"), paste(numclust, "Young_Mutant", sep = "_"), 
+                                                       paste(numclust, "Aged_Wild", sep = "_"), paste(numclust, "Aged_Mutant", sep = "_"), 
+                                                       "Other_Young_Wild", "Other_Young_Mutant", "Other_Aged_Wild", "Other_Aged_Mutant"))
+  names(colorClusterCond) <- levels(seurat$clusterCondDis)
+  if(flip == TRUE){
+    plot <- DimPlot(seurat, group.by = "clusterCondDis", pt.size = 0.1) + labs(title = numclust) +
+      theme(axis.title = element_text(size = 6), axis.text = element_text(size = 6), title = element_text(size = 7)) +
+      scale_colour_manual(values = colorClusterCond) +
+      coord_flip() + scale_y_reverse()
+  }else{
+    plot <- DimPlot(seurat, group.by = "clusterCondDis", pt.size = 0.1) + labs(title = numclust) +
+      theme(axis.title = element_text(size = 6), axis.text = element_text(size = 6), title = element_text(size = 7)) +
+      scale_colour_manual(values = colorClusterCond) 
+  }
+  return(plot)
+}
+
+#Barplot to show distribution of samples along the cluster
+getSamplePropPerClustBarplot <- function(hspc.combined) {
+  clustersampleName <- ddply(hspc.combined@meta.data,~FinalCluster + sampleName,nrow)
+  nCellSample <- ddply(hspc.combined@meta.data,~sampleName, nrow)
+  names(nCellSample)[2] <- "TotalCell"
+  test <- join(x = clustersampleName, y = nCellSample, by = "sampleName")
+  
+  test$propSample <- test$V1/test$TotalCell
+  
+  propExpect <- table(hspc.combined@meta.data$sampleName)/length(hspc.combined@meta.data$sampleName)[]
+  propYoungExp <- propExpect[[unique(hspc.combined@meta.data$sampleName)[1]]]
+  
+  sampleName <- ggplot(data.frame(test), aes(fill = sampleName,y = propSample, x=FinalCluster)) +
+    geom_bar( stat="identity", position="fill")+
+    scale_fill_manual( values= rev(hue_pal()(length(unique(hspc.combined@meta.data$sampleName)))))+
+    scale_y_continuous(name = "Sample (%)", labels = c(0,25,50,75,100))+
+    ylab(label = "")+xlab(label = "") + coord_flip()+
+    theme(legend.title=element_blank(), axis.text.y = element_text(size = 16)) 
+  return(sampleName)
+}
+
+getGenotypePropPerClustBarplot <- function(hspc.combined) {
+  clusterGenotype <- ddply(hspc.combined@meta.data,~FinalCluster + Genotype,nrow)
+  
+  propExpect <- table(hspc.combined@meta.data$Genotype)/length(hspc.combined@meta.data$Genotype)[]
+  # propWildExp <- propExpect[[unique(hspc.combined@meta.data$Genotype)[2]]]
+  propWildExp <- propExpect["Wild"][[1]]
+  #clusterAGE$numclust <- factor(v$clusterNature , levels = c(""))
+  clusterGenotype$Genotype <- factor(clusterGenotype$Genotype , levels = c("Mutant","Wild")) 
+  
+  
+  Genotype <- ggplot(data.frame(clusterGenotype), aes(fill = Genotype,y = V1, x=FinalCluster)) +
+    geom_bar( stat="identity", position="fill")+
+    scale_fill_manual( values= rev(hue_pal()(length(unique(hspc.combined@meta.data$Genotype)))))+
+    scale_y_continuous(name = "Genotype (%)", labels = c(0,25,50,75,100))+
+    ylab(label = "")+xlab(label = "") + coord_flip() + geom_hline(yintercept = propWildExp)+
+    theme(legend.title=element_blank(), axis.text.y = element_text(size = 16))
+  return(Genotype)
+  
+}
+
+getAGEPropPerClustBarplot_2 <- function(hspc.combined) {
+  clusterAge <- ddply(hspc.combined@meta.data,~FinalCluster + AGE,nrow)
+  
+  propExpect <- table(hspc.combined@meta.data$AGE)/length(hspc.combined@meta.data$AGE)[]
+  propYoungExp <- propExpect[[unique(hspc.combined@meta.data$AGE)[1]]]
+  
+  #clusterAGE$numclust <- factor(v$clusterNature , levels = c(""))
+  clusterAge$AGE <- factor(clusterAge$AGE , levels = c("Aged","Young"))
+  
+  
+  AGE <- ggplot(data.frame(clusterAge), aes(fill = AGE,y = V1, x=FinalCluster,levels = "Young","Aged")) +
+    geom_bar( stat="identity", position="fill")+
+    scale_fill_manual( values= rev(hue_pal()(length(unique(hspc.combined@meta.data$AGE)))))+
+    scale_y_continuous(name = "Age (%)", labels = c(0,25,50,75,100))+
+    ylab(label = "")+xlab(label = "") + coord_flip() + geom_hline(yintercept = 1-propYoungExp)+
+    theme(legend.title=element_blank(), axis.text.y = element_text(size = 16))
+  return(AGE)
+  
+}
+
+#Bind sampleName and barcode (needed to annotate CR2 data)
+tagCell <- function(cds){
+  pData(cds)$taggedBarcode <- paste(pData(cds)$sampleName, pData(cds)$barcode, sep = "_")
+  return(cds)
+}
+
+#Add information if the cell is found with CellRanger2
+getCR2diff <- function(seurat = seurat, allCellCR2){
+  uniqueCell <- setdiff(rownames(seurat@meta.data), allCellCR2)
+  
+  seurat@meta.data$CellRanger2 <- "Both"
+  seurat@meta.data[uniqueCell,"CellRanger2"] <- "CellRanger3"
+  seurat@meta.data[which(is.element(seurat@meta.data$sampleName, set = c("young_Zbtb_B", "old_Zbtb_B"))),"CellRanger2"] <- "NewData"
+  
+  clusterCellRanger2 <- ddply(seurat@meta.data,~FinalCluster + CellRanger2,nrow)
+  
+  originName <- ggplot(data.frame(clusterCellRanger2), aes(fill = CellRanger2,y = V1, x=FinalCluster)) +
+    geom_bar( stat="identity", position="fill")+
+    scale_fill_manual( values= rev(hue_pal()(length(unique(seurat@meta.data$CellRanger2)))))+
+    scale_y_continuous(name = "Sample (%)", labels = c(0,25,50,75,100))+
+    ylab(label = "")+xlab(label = "") + coord_flip()+
+    theme(legend.title=element_blank()) 
+  return(originName)
+}
+
+#Hypergeometric test and plotting the result with a bp function, use a funtion from funForSeurat.R
+getEnrichPopClust <- function(hspc.combined, Xname, Yname, colorX, colorY, metaCol = "AGE"){
+  conditionEnrich <- getEnrichAge(hspc.combined = hspc.combined,clustCol = "numclust", metaCol = metaCol)
+  conditionEnrich <- as.data.frame(t(conditionEnrich))
+  conditionEnrich$color <- "black"
+  conditionEnrich[which(as.numeric(as.vector(conditionEnrich$phyper)) < 0.05 & conditionEnrich$enrich == Xname),"color"] <- colorX
+  conditionEnrich[which(as.numeric(as.vector(conditionEnrich$phyper)) < 0.05 & conditionEnrich$enrich == Yname),"color"] <- colorY
+  return(conditionEnrich)
+}
+
+# Allow you to find the markers between 2 conditions for each 
+MarkersAnalysis <- function(seurat = seurat, analysis){
+  seurat@meta.data$Analysis <- paste(seurat@meta.data[,analysis], seurat@meta.data$FinalCluster, sep = "_")
+  condAnalysis <- unique(seurat@meta.data[,analysis])
+  
+  Idents(seurat) <- "Analysis"
+  
+  allMarkers <- data.frame()
+  for(clustName in unique(seurat@meta.data$FinalCluster)){
+    print(clustName)
+    markersClust <- FindMarkers(object = seurat, ident.1 = paste(condAnalysis[1], clustName, sep = "_"), 
+                                ident.2 = paste(condAnalysis[2], clustName, sep = "_"), logfc.threshold = 0.25)
+    
+    markersClust[,paste(condAnalysis[1], condAnalysis[2], sep = "_vs_")] <- paste(condAnalysis[1], clustName, "up", sep ="_")
+    markersClust[which(markersClust$avg_logFC < 0),paste(condAnalysis[1], condAnalysis[2], sep = "_vs_")] <- paste(condAnalysis[1], clustName, "down",sep ="_")
+    markersClust$Gene <- rownames(markersClust)
+    markersClust <- markersClust[order(markersClust$avg_logFC),]
+    markersClust <- markersClust[which(markersClust$p_val_adj < 0.05),]
+    allMarkers <- rbind(allMarkers, markersClust)
+  }
+  return(allMarkers)
+}
+
+getPredictedPerSample <- function(seurat){
+  propPredicted <- ddply(seurat@meta.data,~sampleName + predicted, nrow)
+  predicted <- ggplot(data.frame(propPredicted), aes(fill = predicted,y = V1, x=sampleName)) +
+    geom_bar( stat="identity", position="fill")+
+    scale_fill_manual( values= hue_pal()(length(unique(seurat@meta.data$predicted))))+
+    scale_y_continuous(name = "Cell type (%)", labels = c(0,25,50,75,100))+
+    ylab(label = "")+xlab(label = "") +
+    theme(legend.title=element_blank(), axis.text.x = element_text(angle = 75, hjust = 1, size = 16), axis.text.y = element_text(size = 12))
+  
+  return(predicted)
+}
+
+getCellTypePerSample <- function(seurat){
+  propPredicted <- ddply(seurat@meta.data,~sampleName + cellType_Actinn, nrow)
+  predicted <- ggplot(data.frame(propPredicted), aes(fill = cellType_Actinn,y = V1, x=sampleName)) +
+    geom_bar( stat="identity", position="fill")+
+    scale_fill_manual( values= hue_pal()(length(unique(seurat@meta.data$cellType_Actinn))))+
+    scale_y_continuous(name = "Cell type (%)", labels = c(0,25,50,75,100))+
+    ylab(label = "")+xlab(label = "") +
+    theme(legend.title=element_blank(), axis.text.x = element_text(angle = 75, hjust = 1, size = 16), axis.text.y = element_text(size = 12))
+  
+  return(predicted)
+}
+
+getPhasesPerSample <- function(seurat){
+  propPhases <- ddply(seurat@meta.data,~sampleName + phases, nrow)
+  phases <- ggplot(data.frame(propPhases), aes(fill = phases,y = V1, x=sampleName)) +
+    geom_bar( stat="identity", position="fill")+
+    scale_fill_manual( values= hue_pal()(length(unique(seurat@meta.data$phases))))+
+    scale_y_continuous(name = "Cell type (%)", labels = c(0,25,50,75,100))+
+    ylab(label = "")+xlab(label = "") +
+    theme(legend.title=element_blank(), axis.text.x = element_text(angle = 75, hjust = 1, size = 16),  axis.text.y = element_text(size = 12))
+  
+  return(phases)
+}
+
+# getPhasePropPerClustBarplotSplit <- function(hspc.combined, color = hue_pal()(length(unique(hspc.combined@meta.data$phases)))) {
+#   clusterpredicted <- ddply(hspc.combined@meta.data,~numclust + phases + condition,nrow)
+#   
+#   split_plot <- list()
+#   for(i in unique(clusterpredicted$condition)){
+#     print(i)
+#     predicted <- ggplot(data.frame(clusterpredicted[which(clusterpredicted$condition == i),]), aes(fill = phases,y = V1, x=numclust)) +
+#       geom_bar( stat="identity", position="fill")+
+#       scale_fill_manual( values= color)+
+#       scale_y_continuous(name = "Cell type (%)", labels = c(0,25,50,75,100))+
+#       ylab(label = "")+xlab(label = "") + coord_flip()+
+#       labs(title = i) + theme(plot.title = element_text(size = 10), axis.text.x = element_text(size = 12))
+#     split_plot[[i]] <- predicted
+#   }
+#   return(split_plot)
+# }
+getPhasePropPerClustBarplotSplit <- function(hspc.combined, color = hue_pal()(length(unique(hspc.combined@meta.data$phases))),
+                                                  condition = "Young_Wild", sig_val = "black") {
+  clusterpredicted <- ddply(hspc.combined@meta.data,~FinalCluster + phases + condition,nrow)
+  
+  split_plot <- list()
+  for(i in condition){
+    print(i)
+    predicted <- ggplot(data.frame(clusterpredicted[which(clusterpredicted$condition == i),]), aes(fill = phases,y = V1, x=FinalCluster)) +
+      geom_bar( stat="identity", position="fill")+
+      scale_fill_manual( values= color)+
+      scale_y_continuous(name = "Cell type (%)", labels = c(0,25,50,75,100))+
+      ylab(label = "")+xlab(label = "") + coord_flip()+ 
+      labs(title = i) + theme(plot.title = element_text(size = 10), axis.text.x = element_text(size = 12), 
+                              axis.text.y = element_text(colour = sig_val, size = 16))
+    split_plot[[i]] <- predicted
+  }
+  return(split_plot)
+}
+
+Fisher_for_cellCycle <- function(seurat, condition){
+  summary_data <- ddply(seurat@meta.data,~condition + FinalCluster + phases, nrow)
+  
+  results_list <- list()
+  for(i in levels(seurat$FinalCluster)){
+    print(i)
+    summ_clust <- summary_data[which(summary_data$FinalCluster == i & summary_data$condition %in% condition),]
+    contingence <- data.frame(dcast(summ_clust, phases ~ condition, fill = 0, value.var = "V1"), row.names = 1)
+    if(nrow(contingence)>1){
+      fisher <- fisher.test(contingence)
+      results_list[i] <- fisher$p.value
+    } else{
+      results_list[i] <- 1
+    }
+  }
+  result <- as.data.frame(do.call("rbind",results_list))
+  result$color <- "black"
+  result[which(result$V1 < 0.05),"color"] <- "#DD3497"
+  
+  return(result)
+}
+
+Fisher_for_cellType <- function(seurat, condition){
+  summary_data <- ddply(seurat@meta.data,~condition + FinalCluster + cellType_Actinn, nrow)
+  
+  results_list <- list()
+  for(i in levels(seurat$FinalCluster)){
+    print(i)
+    summ_clust <- summary_data[which(summary_data$FinalCluster == i & summary_data$condition %in% condition),]
+    contingence <- data.frame(dcast(summ_clust, cellType_Actinn ~ condition, fill = 0, value.var = "V1"), row.names = 1)
+    if(nrow(contingence)>1){
+      fisher <- fisher.test(contingence)
+      results_list[i] <- fisher$p.value
+    } else{
+      results_list[i] <- 1
+    }
+  }
+  result <- as.data.frame(do.call("rbind",results_list))
+  result$color <- "black"
+  result[which(result$V1 < 0.05),"color"] <- "red"
+  
+  return(result)
+}
+
+Fisher_for_predicted <- function(seurat, condition){
+  summary_data <- ddply(seurat@meta.data,~condition + FinalCluster + predicted, nrow)
+  
+  results_list <- list()
+  for(i in levels(seurat$FinalCluster)){
+    print(i)
+    summ_clust <- summary_data[which(summary_data$FinalCluster == i & summary_data$condition %in% condition),]
+    contingence <- data.frame(dcast(summ_clust, predicted ~ condition, fill = 0, value.var = "V1"), row.names = 1)
+    if(nrow(contingence)>1){
+      fisher <- fisher.test(contingence)
+      results_list[i] <- fisher$p.value
+    } else{
+      results_list[i] <- 1
+    }
+  }
+  result <- as.data.frame(do.call("rbind",results_list))
+  result$color <- "black"
+  result[which(result$V1 < 0.05),"color"] <- "red"
+  
+  return(result)
+}
+
+
+getPredictedPropPerClustBarplotSplit <- function(hspc.combined, colors = hue_pal()(length(unique(hspc.combined@meta.data$predicted))), 
+                                                 condition = "Young_Wild", sig_val = "black") {
+  clusterpredicted <- ddply(hspc.combined@meta.data,~FinalCluster + predicted + condition,nrow)
+  
+  split_plot <- list()
+  for(i in condition){
+    print(i)
+    predicted <- ggplot(data.frame(clusterpredicted[which(clusterpredicted$condition == i),]), aes(fill = predicted,y = V1, x=FinalCluster)) +
+      geom_bar( stat="identity", position="fill")+
+      scale_fill_manual( values= colors)+
+      scale_y_continuous(name = "Cell type (%)", labels = c(0,25,50,75,100))+
+      ylab(label = "")+xlab(label = "") + coord_flip()+
+      labs(title = i) + theme(plot.title = element_text(size = 10), axis.text.x = element_text(size = 12), 
+                              axis.text.y = element_text(colour = sig_val, size = 16))
+    split_plot[[i]] <- predicted
+  }
+  return(split_plot)
+}
+
+getCellTypePropPerClustBarplotSplit <- function(hspc.combined, colors = hue_pal()(length(unique(hspc.combined@meta.data$predicted))), 
+                                                 condition = "Young_Wild", sig_val = "black") {
+  clusterpredicted <- ddply(hspc.combined@meta.data,~FinalCluster + cellType_Actinn + condition,nrow)
+  
+  split_plot <- list()
+  for(i in condition){
+    print(i)
+    cellType <- ggplot(data.frame(clusterpredicted[which(clusterpredicted$condition == i),]), aes(fill = cellType_Actinn,y = V1, x=FinalCluster)) +
+      geom_bar( stat="identity", position="fill")+
+      scale_fill_manual( values= colors)+
+      scale_y_continuous(name = "Cell type (%)", labels = c(0,25,50,75,100))+
+      ylab(label = "")+xlab(label = "") + coord_flip()+
+      labs(title = i) + theme(plot.title = element_text(size = 10),  
+                              axis.text.y = element_text(colour = sig_val, size = 16))
+    split_plot[[i]] <- cellType
+  }
+  return(split_plot)
+}
+
+
+getPhasePropPerPredictedBarplotSplit <- function(hspc.combined, colors = hue_pal()(length(unique(hspc.combined@meta.data$phases))), 
+                                                 condition = "Young_Wild") {
+  clusterpredicted <- ddply(hspc.combined@meta.data,~phases + predicted + condition,nrow)
+  
+  split_plot <- list()
+  for(i in condition){
+    print(i)
+    predicted <- ggplot(data.frame(clusterpredicted[which(clusterpredicted$condition == i),]), aes(fill = phases,y = V1, x=predicted)) +
+      geom_bar( stat="identity", position="fill")+
+      scale_fill_manual( values= colors)+
+      scale_y_continuous(name = "Cell type (%)", labels = c(0,25,50,75,100))+
+      ylab(label = "")+xlab(label = "") + 
+      labs(title = i) + theme(plot.title = element_text(size = 10))
+    split_plot[[i]] <- predicted
+  }
+  return(split_plot)
+}
+
+# getCellTypePropPerClustBarplotSplit <- function(hspc.combined, colors = hue_pal()(length(unique(hspc.combined@meta.data$cellType_Actinn)))) {
+#   clusterpredicted <- ddply(hspc.combined@meta.data,~FinalCluster + cellType_Actinn + condition,nrow)
+#   
+#   split_plot <- list()
+#   for(i in unique(clusterpredicted$condition)){
+#     print(i)
+#     predicted <- ggplot(data.frame(clusterpredicted[which(clusterpredicted$condition == i),]), aes(fill = cellType_Actinn,y = V1, x=FinalCluster)) +
+#       geom_bar( stat="identity", position="fill")+
+#       scale_fill_manual( values= colors)+
+#       scale_y_continuous(name = "Cell type (%)", labels = c(0,25,50,75,100))+
+#       ylab(label = "")+xlab(label = "") + coord_flip()+
+#       labs(title = i) + theme(plot.title = element_text(size = 10))
+#     split_plot[[i]] <- predicted
+#   }
+#   return(split_plot)
+# }
+
+getCellTypePerCondition <- function(seurat, colors = hue_pal()(length(unique(seurat$cellType_Actinn)))){
+  cellType <- ddply(seurat@meta.data,~condition + cellType_Actinn, nrow)
+  barplot <- ggplot(data = data.frame(cellType), aes(fill=cellType_Actinn, y=V1, x=condition)) +
+    geom_bar(stat = "identity", position = "fill") +
+    scale_fill_manual(values = colors) + 
+    scale_y_continuous(name = "Cell type (%)", labels = c(0,25,50,75,100)) +
+    ggtitle("Actinn") + theme(axis.text.x = element_text(size = 16, angle = -15), axis.title.x = element_blank(), plot.title = element_text(size = 25))
+  
+  return(barplot)
+}
diff --git a/R_src/funForLoading.R b/R_src/funForLoading.R
new file mode 100644
index 0000000000000000000000000000000000000000..839f78a4fcd58ee474173055824cc978bc054925
--- /dev/null
+++ b/R_src/funForLoading.R
@@ -0,0 +1,55 @@
+loadCellRangerMatrix <- function(matrix_dir,sample_name = NULL) {
+  if (!is.null(sample_name)) {
+    sample_name <- paste0(sample_name,"_")
+  }
+  barcode.path <- paste0(matrix_dir,"/",sample_name,"barcodes.tsv")
+  features.path <- paste0(matrix_dir,"/",sample_name,"genes.tsv")
+  matrix.path <- paste0(matrix_dir,"/",sample_name,"matrix.mtx")
+  mat <- readMM(file = matrix.path)
+  feature.names = read.delim(features.path, 
+                             header = FALSE,
+                             stringsAsFactors = FALSE)
+  barcode.names = read.delim(barcode.path, 
+                             header = FALSE,
+                             stringsAsFactors = FALSE)
+  colnames(mat) = barcode.names$V1
+  rownames(mat) = feature.names$V1
+  colnames(feature.names) <- c("id", "symbol")
+  rownames(feature.names) <- feature.names$id
+  colnames(barcode.names) <- c("barcode")
+  rownames(barcode.names) <- barcode.names$barcode
+  results <- list()
+  results$exprs <- mat
+  results$fd <- feature.names
+  results$pd <- barcode.names
+  
+  return(results)
+}
+
+loadCellRangerMatrix_cellranger3 <- function(matrix_dir,sample_name =NULL) {
+  if (!is.null(sample_name)) {
+    sample_name <- paste0(sample_name,"_")
+  }
+  barcode.path <- paste0(matrix_dir,"/",sample_name,"barcodes.tsv")
+  features.path <- paste0(matrix_dir,"/",sample_name,"features.tsv")
+  matrix.path <- paste0(matrix_dir,"/",sample_name,"matrix.mtx")
+  mat <- readMM(file = matrix.path)
+  feature.names = read.delim(features.path,
+                             header = FALSE,
+                             stringsAsFactors = FALSE)
+  barcode.names = read.delim(barcode.path,
+                             header = FALSE,
+                             stringsAsFactors = FALSE)
+  colnames(mat) = barcode.names$V1
+  rownames(mat) = feature.names$V1
+  colnames(feature.names) <- c("id", "symbol")
+  rownames(feature.names) <- feature.names$id
+  colnames(barcode.names) <- c("barcode")
+  rownames(barcode.names) <- barcode.names$barcode
+  results <- list()
+  results$exprs <- mat
+  results$fd <- feature.names
+  results$pd <- barcode.names
+
+  return(results)
+}
diff --git a/R_src/funForPlot.R b/R_src/funForPlot.R
new file mode 100644
index 0000000000000000000000000000000000000000..ae6058bffef12cc436c5980d8c8a2e31d64a37a6
--- /dev/null
+++ b/R_src/funForPlot.R
@@ -0,0 +1,38 @@
+g_legend<-function(a.gplot,direction = "horizontal"){
+  a.gplot <- a.gplot +theme(legend.direction=direction)
+  tmp <- ggplot_gtable(ggplot_build(a.gplot))
+  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
+  legend <- tmp$grobs[[leg]] 
+  return(legend)}
+
+
+plotScore <- function(monocle,
+                      traj,
+                      sig,
+                      shortName,
+                      se = T,
+                      ptBranching1 = NULL,
+                      ptBranching2 = NULL) {
+  
+  
+  plot <- ggplot(pData(monocle)[which(pData(monocle)$State %in% traj),] ,aes(color = AGE,y = get(sig), x=Pseudotime)) + 
+    geom_smooth(se = se)  + 
+    ylab("Score")+
+    scale_color_manual(values = hue_pal()(2)[c(1,2)]) +
+    ggtitle(shortName) + 
+    theme(legend.text  = element_text(size = 18),
+          axis.text = element_text(size = 16),
+          axis.title = element_text(size = 18),
+          plot.title = element_text(size = 20))+
+    guides(colour = guide_legend(override.aes = list(size=8)))
+  
+  if (!is.null(ptBranching1)) {
+    plot <- plot + geom_vline(aes(xintercept=ptBranching1), color="red", linetype="dashed", size=1)
+  }
+  
+  if (!is.null(ptBranching2)) {
+    plot <- plot + geom_vline(aes(xintercept=ptBranching2), color="black", linetype="dashed", size=1)
+  }
+  
+  return(plot)
+}
diff --git a/R_src/funForSeurat.R b/R_src/funForSeurat.R
new file mode 100644
index 0000000000000000000000000000000000000000..970b280e64021ce828326e99d88d189cc8976bb0
--- /dev/null
+++ b/R_src/funForSeurat.R
@@ -0,0 +1,509 @@
+firstup <- function(x) {
+  substr(x, 1, 1) <- toupper(substr(x, 1, 1))
+  x
+}
+
+
+# DEPRECATED Seurat2
+# getClustInfo <- function(clust,signatures,testHyperSig,seurat,groupingName =NULL,markers) {
+#   clustInfo <- list()
+#   clustInfo$num_cells <- dim(seurat@meta.data[which(seurat@ident==clust),])[1]
+#   clustInfo$percent_cells <- clustInfo$num_cells/dim(seurat@meta.data)[1]
+#   
+#   # if(!is.null(groupingName)) {
+#   #   for (sample_name in unique(seurat@meta.data$sampleName)) {
+#   #     clustInfo[[paste(sample_name,"percentCells",sep ="")]] <- dim(seurat@meta.data[which(seurat@ident==clust & seurat@meta.data$sampleName==sample_name),])[1]/clustInfo$num_cells
+#   #   }
+#   # }
+#     
+#     percentPhases <- table(seurat@meta.data[which(seurat@ident==clust),"phases"])/length(seurat@meta.data[which(seurat@ident==clust),"phases"]) #In fact this is fraction not percentage
+#   
+#       
+#     # clustInfo$percent_G1_G0 <-percentPhases["G1_G0"]
+#     # clustInfo$percent_S <- percentPhases["S"]
+#     # clustInfo$percent_G2_M <- percentPhases["G2_M"]
+#     
+#     for (p in c("G1_G0","S","G2_M")) {
+#       if (is.element(p,names(percentPhases))) {
+#         clustInfo[[p]] <- percentPhases[p] 
+#       } else {
+#         clustInfo[[p]] <- 0
+#       }
+#     }
+#     
+#     # cell types
+#     percentCellType <- table(seurat@meta.data[which(seurat@ident==clust),"CellType"])/length(seurat@meta.data[which(seurat@ident==clust),"CellType"])
+#     
+#     for (t in unique(seurat@meta.data$CellType)) {
+#       if (is.element(t,names(percentCellType))) {
+#         clustInfo[[t]] <- percentCellType[t] 
+#       } else {
+#         clustInfo[[t]] <- 0
+#       }
+#     }
+#     
+#     if(!is.null(seurat@meta.data$age)) {
+#       
+#       # cell types
+#     percentAge <- table(seurat@meta.data[which(seurat@ident==clust),"age"])/length(seurat@meta.data[which(seurat@ident==clust),"age"])
+#     
+#     EnrichAge <- (table(seurat@meta.data[which(seurat@ident==clust),"age"]) / ((table(seurat@meta.data[,"age"])/sum(table(seurat@meta.data[,"age"])))*clustInfo$num_cells)) - 1
+#     
+#     for (a in unique(seurat@meta.data$age)) {
+#       if (is.element(a,names(percentAge))) {
+#         clustInfo[[a]] <- percentAge[a] 
+#         clustInfo[[paste("enrichAge_",a,sep ="")]] <- EnrichAge[a]
+#       } else {
+#         clustInfo[[a]] <- 0
+#       }
+#     }
+#       
+#     }
+#     
+#     if(!is.null(seurat@meta.data$percentCellTypeAge)) {
+#       
+#       # cell types by age
+#       percentCellTypeAge <- table(seurat@meta.data[which(seurat@ident==clust),"CellTypeAge"])/length(seurat@meta.data[which(seurat@ident==clust),"CellTypeAge"])
+#       
+#       for (at in unique(seurat@meta.data$CellTypeAge)) {
+#         if (is.element(a,names(percentCellTypeAge))) {
+#           clustInfo[[at]] <- percentCellTypeAge[at] 
+#         } else {
+#           clustInfo[[at]] <- 0
+#         }
+#       }
+#       
+#     }
+#     
+#     
+#     
+#     clustInfo$median_genes_expressed <- median(seurat@meta.data[which(seurat@ident==clust),"numGenesPerCells"])
+#     clustInfo$median_nUMI <- median(seurat@meta.data[which(seurat@ident==clust),"Total_mRNAs"])
+#     clustInfo$median_percentMitochGenes <- median(seurat@meta.data[which(seurat@ident==clust),"percentMito"])
+#     
+#     clustSig <- lapply(signatures,testHyperSig,seurat,clust,markers = markers) #forgot markers arg
+#     
+#     clustInfo <- c(clustInfo,clustSig)
+#     
+#     return(clustInfo)
+#     
+# }
+
+
+FindAgingMarkers3 <- function(cluster,hspc.combined) {
+  hspc.combined$cluster.AGE <- paste(Idents(object = hspc.combined), hspc.combined$AGE, sep = "_")
+  Idents(object = hspc.combined) <- "cluster.AGE"
+  agingMarkers <- FindMarkers(object = hspc.combined, 
+                              ident.1 = paste(cluster,"_Old",sep = ""), 
+                              ident.2 = paste(cluster,"_Young",sep = ""), 
+                              verbose = FALSE)
+  agingMarkers$Cluster <- paste(cluster,"_Old_up",sep ="")
+  agingMarkers$Cluster[which(agingMarkers$avg_logFC < 0)] <- paste(cluster,"_Old_down",sep ="")
+  agingMarkers$Gene <- rownames(agingMarkers)
+  agingMarkers <- agingMarkers[order(agingMarkers$avg_logFC),]
+  agingMarkers <- agingMarkers[which(agingMarkers$p_val_adj < 0.05),]
+  return(agingMarkers)
+}
+
+# DEPRECATED Seurat2
+# FindAgingMarkers <- function(cluster,seurat.combined,outdir = "./",logfc.threshold = 0.25) {
+#   age <- unique(seurat.combined@meta.data$age)
+#   ident.1 <- paste0(cluster,"_age_",age[1])
+#   ident.2 <- paste0(cluster,"_age_",age[2])
+#   age_effect <- FindMarkers(seurat.combined, ident.1 = ident.1, ident.2 = ident.2, 
+#                             print.bar = FALSE,logfc.threshold = logfc.threshold)
+#   dir.create(paste(outdir,"/aging_test_on_cluster_",cluster,sep =""),showWarnings = F)
+#   
+#   print(cluster)
+#   
+#   age_effect <- age_effect[which(age_effect$p_val_adj < 0.05),]
+#   
+#   if(dim(age_effect)[1]!=0) {
+#     age_effect <- age_effect[order(age_effect$avg_logFC),]
+#     age_effect$Gene <- rownames(age_effect)
+#     age_effect$Cluster <- NA
+#     age_effect[which(age_effect$avg_logFC > 0),"Cluster"] <- "Old_down"
+#     age_effect[which(age_effect$avg_logFC < 0),"Cluster"] <- "Old_up"
+#     age_effect$Cluster <- paste(age_effect$Cluster,"_cluster_",cluster,sep="")
+#     
+#     resDir <- paste(outdir,"/aging_test_on_cluster_",cluster,sep ="")
+#     
+#     gprofileClustResult <- gProfileAnalysis(deg_clust = age_effect,
+#                                             outdir = paste(resDir,"/gProfiler", sep =""),
+#                                             background = row.names(seurat.combined@data))
+#     
+#     # with a specific bg
+#     
+# 
+#     colClustRes <- "numclust"
+#     cellInClust <- row.names(seurat.combined@meta.data[which(seurat.combined@meta.data[,colClustRes]==cluster),])
+# 
+#     
+#     subSeurat <- SubsetData(seurat.combined,cells.use = cellInClust)
+#     print(is(subSeurat))
+#     
+#     #get expressed genes in this cluster
+#     num.cells <- rowSums(as.matrix(subSeurat@data) > 0)
+#     genes.use <- names(num.cells[which(num.cells >= 1)])
+#     
+#     gprofileClustResult <- gProfileAnalysis(deg_clust = age_effect,
+#                                             outdir = paste(resDir,"/gProfilerSpecificBg", sep =""),
+#                                             background = genes.use)
+#   }
+#   
+#   #############################################################################################################
+#   write.table(age_effect,paste(outdir,"/aging_test_on_cluster_",cluster,"/AgingMarkers_of_clust",cluster,".tsv", sep =""),sep = "\t",quote = F,col.names = NA)
+#   return(age_effect)
+# }
+
+
+# DEPRECATED Seurat2
+# removeNonExpressedGenes <- function(seurat,minPropCellExp) {
+#   pd <- new("AnnotatedDataFrame", data = seurat@meta.data)
+#   fd <- new("AnnotatedDataFrame", data = data.frame(gene_short_name = rownames(seurat@data)))
+#   rownames(fd) <- fd$gene_short_name
+#   
+#   gbm_cds <- newCellDataSet(seurat@raw.data,
+#                             phenoData = pd,
+#                             featureData = fd,
+#                             lowerDetectionLimit = 0.1,
+#                             expressionFamily = negbinomial.size())
+#   
+#   gbm_cds <- detectGenes(gbm_cds, min_expr = 0.1)
+#   
+#   
+#   
+#   print("remove non expressed genes in the subset (non expressed in at least X% of the cells X user option in monocle dp feature 5% in seurat tutorial 0,1%)")
+#   fData(gbm_cds)$use_for_seurat <- fData(gbm_cds)$num_cells_expressed > minPropCellExp * ncol(gbm_cds)
+#   
+#   gbm_to_seurat <- gbm_cds[fData(gbm_cds)$use_for_seurat==T,]
+#   gbm_to_seurat <- gbm_cds
+#   
+#   # Only needed if ensemble id to lazy to code the test for the moment 
+#   #rownames(gbm_to_seurat) <- make.unique(fData(gbm_to_seurat)$gene_short_name,sep = "_") #be careful in diff exp results for gene test enrichment
+#   
+#   if (is.element("Cluster",colnames(pData(gbm_to_seurat)))) {
+#     colnames(pData(gbm_to_seurat))[which(colnames(pData(gbm_to_seurat))=="Cluster")] <- "Cluster_monocle"
+#   }
+#   ##Convert to seurat
+#   seurat <- exportCDS(gbm_to_seurat,"Seurat")
+#   
+#   return(seurat)
+#   
+# }
+
+
+## DEPRECATED Seurat 2 Rename ident 
+
+# renameIdent <- function(seurat, old.ident.name,new.ident.name) {
+#   
+#   seurat@ident <- plyr::mapvalues(x = seurat@ident, from = old.ident.name, to = new.ident.name)
+#   seurat@ident <- factor(x = seurat@ident, levels = new.ident.name)
+#   
+#   return(seurat)
+# }
+
+
+
+
+getAGEPropPerClustBarplot <- function(hspc.combined) {
+  clusterAge <- ddply(hspc.combined@meta.data,~numclust + AGE,nrow)
+  
+  propExpect <- table(hspc.combined@meta.data$AGE)/length(hspc.combined@meta.data$AGE)[]
+  propYoungExp <- propExpect[[unique(hspc.combined@meta.data$AGE)[1]]]
+  
+  #clusterAGE$numclust <- factor(v$clusterNature , levels = c(""))
+  clusterAge$AGE <- factor(clusterAge$AGE , levels = c("Old","Young")) 
+  
+  
+  AGE <- ggplot(data.frame(clusterAge), aes(fill = AGE,y = V1, x=numclust,levels = "Young","Old")) +
+    geom_bar( stat="identity", position="fill")+
+    scale_fill_manual( values= rev(hue_pal()(length(unique(hspc.combined@meta.data$AGE)))))+
+    scale_y_continuous(name = "Age (%)", labels = c(0,25,50,75,100))+
+    ylab(label = "")+xlab(label = "") + coord_flip() + geom_hline(yintercept = propYoungExp)+
+    theme(legend.title=element_blank())
+  return(AGE)
+  
+}
+
+
+getSamplePropPerClustBarplot <- function(hspc.combined) {
+  clustersampleName <- ddply(hspc.combined@meta.data,~numclust + sampleName,nrow)
+  
+  propExpect <- table(hspc.combined@meta.data$sampleName)/length(hspc.combined@meta.data$sampleName)[]
+  propYoungExp <- propExpect[[unique(hspc.combined@meta.data$sampleName)[1]]]
+  
+  #clustersampleName$numclust <- factor(v$clusterNature , levels = c(""))
+  #clustersampleName$sampleName <- factor(clustersampleName$predicted , levels = c("")) 
+  
+  
+  sampleName <- ggplot(data.frame(clustersampleName), aes(fill = sampleName,y = V1, x=numclust)) +
+    geom_bar( stat="identity", position="fill")+
+    scale_fill_manual( values= rev(hue_pal()(length(unique(hspc.combined@meta.data$sampleName)))))+
+    scale_y_continuous(name = "Sample (%)", labels = c(0,25,50,75,100))+
+    ylab(label = "")+xlab(label = "") + coord_flip()+
+    theme(legend.title=element_blank()) 
+  return(sampleName)
+  
+}
+
+getRunDatePropPerClustBarplot <- function(hspc.combined) {
+  clusterrunDate <- ddply(hspc.combined@meta.data,~numclust + runDate,nrow)
+  
+  propExpect <- table(hspc.combined@meta.data$runDate)/length(hspc.combined@meta.data$runDate)[]
+  propYoungExp <- propExpect[[unique(hspc.combined@meta.data$runDate)[1]]]
+  
+  #clusterrunDate$numclust <- factor(v$clusterNature , levels = c(""))
+  #clusterrunDate$runDate <- factor(clusterrunDate$predicted , levels = c("")) 
+  
+  
+  runDate <- ggplot(data.frame(clusterrunDate), aes(fill = runDate,y = V1, x=numclust))  +
+    geom_bar( stat="identity", position="fill")+
+    scale_fill_manual( values= rev(hue_pal()(length(unique(hspc.combined@meta.data$runDate)))))+
+    scale_y_continuous(name = "Run date (%)", labels = c(0,25,50,75,100))+
+    ylab(label = "")+xlab(label = "") + coord_flip()+
+    theme(legend.title=element_blank())  
+  return(runDate)
+  
+}
+
+getPhasePropPerClustBarplot <- function(hspc.combined) {
+  clusterphases <- ddply(hspc.combined@meta.data,~numclust + phases,nrow)
+  
+  propExpect <- table(hspc.combined@meta.data$phases)/length(hspc.combined@meta.data$phases)[]
+  propYoungExp <- propExpect[[unique(hspc.combined@meta.data$phases)[1]]]
+  
+  #clusterphases$numclust <- factor(v$clusterNature , levels = c(""))
+  #clusterphases$phases <- factor(clusterphases$predicted , levels = c("")) 
+  
+  
+  phases <- ggplot(data.frame(clusterphases), aes(fill = phases,y = V1, x=numclust)) +
+    geom_bar( stat="identity", position="fill")+
+    scale_fill_manual( values= rev(hue_pal()(length(unique(hspc.combined@meta.data$phases)))))+
+    scale_y_continuous(name = "Cell cycle phase (%)", labels = c(0,25,50,75,100))+
+    ylab(label = "")+xlab(label = "") + coord_flip() +
+    theme(legend.title=element_blank())  
+  return(phases)
+  
+}
+
+
+getPredictedPropPerClustBarplot <- function(hspc.combined) {
+  clusterpredicted <- ddply(hspc.combined@meta.data,~numclust + predicted,nrow)
+  
+  propExpect <- table(hspc.combined@meta.data$predicted)/length(hspc.combined@meta.data$predicted)[]
+  propYoungExp <- propExpect[[unique(hspc.combined@meta.data$predicted)[1]]]
+  
+  #clusterpredicted$numclust <- factor(v$clusterNature , levels = c(""))
+  #clusterpredicted$predicted <- factor(clusterpredicted$predicted , levels = c("")) 
+  
+  
+  predicted <- ggplot(data.frame(clusterpredicted), aes(fill = predicted,y = V1, x=numclust)) +
+    geom_bar( stat="identity", position="fill")+
+    scale_fill_manual( values= hue_pal()(length(unique(hspc.combined@meta.data$predicted))))+
+    scale_y_continuous(name = "Cell type (%)", labels = c(0,25,50,75,100))+
+    ylab(label = "")+xlab(label = "") + coord_flip()+
+    theme(legend.title=element_blank())  
+  return(predicted)
+  
+}
+
+
+
+
+getEnrichAge <- function(hspc.combined,clustCol ='clusterName',metaCol = "age") {
+  
+  table <- table(hspc.combined@meta.data[,metaCol],hspc.combined@meta.data[,clustCol])
+  
+  #Remove null column in case of reclustering has been made
+  
+  table <- table[,as.vector(which(colSums(table)>0))]
+  
+  tablePercent <- prop.table(table,2)
+  
+  propExpect <- table(hspc.combined@meta.data[,metaCol])/length(hspc.combined@meta.data[,metaCol])
+  propExpectAge_1<- propExpect[[unique(hspc.combined@meta.data[,metaCol])[1]]]
+  propExpectAge_2<- propExpect[[unique(hspc.combined@meta.data[,metaCol])[2]]]
+  phyper <- rep(NA,length(colnames(table)))
+  enrich <- rep(NA,length(colnames(table)))
+  tablePercent <- rbind(tablePercent,enrich,phyper)
+  
+  
+  for (age in unique(hspc.combined@meta.data[,metaCol])) {
+    for (cluster in colnames(table)) {
+      if(tablePercent[age,cluster] > propExpect[[age]]) {
+        cells_pull_marked <- table[age,as.character(cluster)]
+        cells_pull <- as.numeric(colSums(table)[as.character(cluster)])
+        cells_marked_all <- rowSums(table)[age]
+        all_cells <- length(hspc.combined@meta.data[,metaCol])
+        
+        
+        
+        p.value <-  phyper(q=cells_pull_marked -1, 
+                           m=cells_marked_all,
+                           n=all_cells - cells_marked_all, k= cells_pull, lower.tail=FALSE)
+        
+        tablePercent["enrich",cluster] <- age
+        
+        tablePercent["phyper",cluster] <- p.value
+        
+      }
+    }
+  }
+  return(tablePercent)
+
+}
+
+
+
+
+
+
+# Make a summary teble of cluster metrics and signature enrichments
+
+getClustTable <- function(rodriguezSig,markers,signatures,seurat,outdir) {
+  clusterNames <- c("C1","C2","C3","Mk","Er","Ba","Neu","Mo1","Mo2", "preDC","preB","preT")
+  
+  RodriguezClustersSig <- lapply(X= c(1:length(clusterNames)),FUN = read_xlsx,path = rodriguezSig)
+  
+  names(RodriguezClustersSig) <- clusterNames 
+  
+  getOnlyPos <- function(clustersSig) {
+    clusterSig <- clustersSig[which(clustersSig$log.effect > 0),]
+    return(clusterSig)
+  }
+  
+  RodriguezClustersSigPos <- lapply(X= RodriguezClustersSig, getOnlyPos)
+  
+  
+  signaturesRodriguez <- lapply(RodriguezClustersSigPos,"[[",1 )
+  
+  
+  firstup <- function(x) {
+    substr(x, 1, 1) <- toupper(substr(x, 1, 1))
+    x
+  }
+  
+  colnames(markers) <- firstup(colnames(markers))
+  
+  getClustEnrichForRodriguez <- function(clust,signatures,seurat,markers) {
+    clustSig <- lapply(signatures,testHyperSig3,seurat,markers,clust)
+    propCellTypesLearned <- table(seurat@meta.data[which(seurat@meta.data$numclust==clust),"predicted"])/length(seurat@meta.data[which(seurat@meta.data$numclust==clust),"predicted"])
+    clustInfo <- c(clustSig,propCellTypesLearned)
+    return(clustInfo)
+  }
+  
+  clust_list <- lapply(unique(markers$Cluster),getClustEnrichForRodriguez,signature=signaturesRodriguez,seurat =seurat,markers =markers) 
+  
+  names(clust_list) <- paste("cluster_",unique(markers$Cluster),sep="")
+  
+  clust_table <- as.data.frame(matrix(unlist(clust_list), nrow=length(unlist(clust_list[1]))))
+  colnames(clust_table) <- names(clust_list)
+  rownames(clust_table) <- names(clust_list[[1]])
+  
+  clust_df <- as.data.frame(t(clust_table))
+  
+  write.csv(clust_df,file = paste(outdir,"/clustInfoRodriguez.csv",sep =""),quote = F)
+  
+  
+  
+  #Clusters table
+  
+  clust_table <- data.frame()
+  
+  print("begin cluster table")
+  
+  getClustInfo <- function(clust,signatures,seurat,markers) { 
+
+    clustInfo <- list()
+    clustInfo$num_cells <- dim(seurat@meta.data[which(seurat@active.ident==clust),])[1]
+    clustInfo$percent_cells <- clustInfo$num_cells/dim(seurat@meta.data)[1]
+    percentPhases <- table(seurat@meta.data[which(seurat@active.ident==clust),"phases"])/length(seurat@meta.data[which(seurat@active.ident==clust),"phases"]) #In fact this is fraction not percentage
+    
+    if(!is.null(seurat@meta.data$predicted)) {
+      percentPredicted <- table(seurat@meta.data[which(seurat@active.ident==clust),"predicted"])/length(seurat@meta.data[which(seurat@active.ident==clust),"predicted"]) #In fact this is fraction not percentage
+      
+      for (p in unique(seurat@meta.data$predicted)) {
+        print(p)
+        if (is.element(p,names(percentPredicted))) {
+          clustInfo[[p]] <- percentPredicted[p] 
+        } else {
+          clustInfo[[p]] <- 0
+        }
+      }
+      
+    }
+    
+    if(!is.null(seurat@meta.data$AGE)) {
+      percentAGE <- table(seurat@meta.data[which(seurat@active.ident==clust),"AGE"])/length(seurat@meta.data[which(seurat@active.ident==clust),"AGE"]) #In fact this is fraction not percentage
+      
+      for (p in unique(seurat@meta.data$AGE)) {
+        print(p)
+        if (is.element(p,names(percentAGE))) {
+          clustInfo[[p]] <- percentAGE[p] 
+        } else {
+          clustInfo[[p]] <- 0
+        }
+      }
+      
+    }
+    
+    if(!is.null(seurat@meta.data$sampleName)) {
+      percentsampleName <- table(seurat@meta.data[which(seurat@active.ident==clust),"sampleName"])/length(seurat@meta.data[which(seurat@active.ident==clust),"sampleName"]) #In fact this is fraction not percentage
+      
+      for (p in unique(seurat@meta.data$sampleName)) {
+        print(p)
+        if (is.element(p,names(percentsampleName))) {
+          clustInfo[[p]] <- percentsampleName[p] 
+        } else {
+          clustInfo[[p]] <- 0
+        }
+      }
+      
+    }
+    
+    
+    for (p in c("G1_G0","S","G2_M")) {
+      if (is.element(p,names(percentPhases))) {
+        clustInfo[[p]] <- percentPhases[p] 
+      } else {
+        clustInfo[[p]] <- 0
+      }
+    }
+    
+    
+    clustInfo$median_genes_expressed <- median(seurat@meta.data[which(seurat@active.ident==clust),"numGenesPerCells"])
+    clustInfo$median_nUMI <- median(seurat@meta.data[which(seurat@active.ident==clust),"Total_mRNAs"])
+    clustInfo$median_percentMitochGenes <- median(seurat@meta.data[which(seurat@active.ident==clust),"percentMito"])
+    
+    
+    clustSig <- lapply(signatures,testHyperSig3,seurat,markers,clust) 
+    
+    clustInfo <- c(clustInfo,clustSig)
+    
+    
+  }
+  
+  allSignatures <- c(signatures,signaturesRodriguez)
+  
+  clust_list <- lapply(levels(unique(seurat@active.ident)),getClustInfo,allSignatures,seurat,markers) 
+  
+  names(clust_list) <- paste("cluster_",levels(unique(seurat@active.ident)),sep="")
+  
+  print("clust_list ok")
+  saveRDS(clust_list,paste(outdir,"/clust_list_save.rds",sep =""))
+  
+  clust_table <- as.data.frame(matrix(unlist(clust_list), nrow=length(unlist(clust_list[1]))))
+  colnames(clust_table) <- names(clust_list)
+  rownames(clust_table) <- names(clust_list[[1]])
+  
+  print("clust_table ok")
+  
+  clust_df <- as.data.frame(t(clust_table))
+  
+  write.table(x = clust_df,file = paste(outdir,"/clusters_table.tsv",sep =""),sep="\t",quote=F,col.names = NA)
+  
+  return(clust_df)
+}
+
+  
diff --git a/R_src/funForSeuratAnalysis.R b/R_src/funForSeuratAnalysis.R
new file mode 100644
index 0000000000000000000000000000000000000000..e41c567dbfcdd97bad397d064f06fa798a527b8e
--- /dev/null
+++ b/R_src/funForSeuratAnalysis.R
@@ -0,0 +1,71 @@
+# Do hypergeometric test of to population to find if ine proportion is significantly increased
+getEnrichAge <- function(hspc.combined,clustCol ='clusterName',metaCol = "age") {
+  
+  table <- table(hspc.combined@meta.data[,metaCol],hspc.combined@meta.data[,clustCol])
+  
+  #Remove null column in case of reclustering has been made
+  
+  table <- table[,as.vector(which(colSums(table)>0))]
+  
+  tablePercent <- prop.table(table,2)
+  
+  propExpect <- table(hspc.combined@meta.data[,metaCol])/length(hspc.combined@meta.data[,metaCol])
+  propExpectAge_1<- propExpect[[unique(hspc.combined@meta.data[,metaCol])[1]]]
+  propExpectAge_2<- propExpect[[unique(hspc.combined@meta.data[,metaCol])[2]]]
+  phyper <- rep(NA,length(colnames(table)))
+  enrich <- rep(NA,length(colnames(table)))
+  tablePercent <- rbind(tablePercent,enrich,phyper)
+  
+  
+  for (age in unique(hspc.combined@meta.data[,metaCol])) {
+    for (cluster in colnames(table)) {
+      if(tablePercent[age,cluster] > propExpect[[age]]) {
+        cells_pull_marked <- table[age,as.character(cluster)]
+        cells_pull <- as.numeric(colSums(table)[as.character(cluster)])
+        cells_marked_all <- rowSums(table)[age]
+        all_cells <- length(hspc.combined@meta.data[,metaCol])
+        
+        
+        
+        p.value <-  phyper(q=cells_pull_marked -1, 
+                           m=cells_marked_all,
+                           n=all_cells - cells_marked_all, k= cells_pull, lower.tail=FALSE)
+        
+        tablePercent["enrich",cluster] <- age
+        
+        tablePercent["phyper",cluster] <- p.value
+        
+      }
+    }
+  }
+  return(tablePercent)
+  
+}
+
+# Hypergeometric test and plotting the result with a bp function, use a getEnrichAge
+getEnrichPopClust <- function(hspc.combined, Xname, Yname, colorX, colorY, metaCol = "AGE", clustCol = "numclust"){
+  conditionEnrich <- getEnrichAge(hspc.combined = hspc.combined,clustCol = clustCol, metaCol = metaCol)
+  conditionEnrich <- as.data.frame(t(conditionEnrich))
+  conditionEnrich$color <- "black"
+  conditionEnrich[which(as.numeric(as.vector(conditionEnrich$phyper)) < 0.01 & conditionEnrich$enrich == Xname),"color"] <- colorX
+  conditionEnrich[which(as.numeric(as.vector(conditionEnrich$phyper)) < 0.01 & conditionEnrich$enrich == Yname),"color"] <- colorY
+  return(conditionEnrich)
+}
+
+# Function to calculate the score diff of a signature between 2 cells pop
+featureDiff <- function(seurat,cells.1,cells.2,feature) {
+  data <- GetAssayData(seurat,slot = "data")
+  total.diff <- mean(data[feature,cells.1]) - mean(data[feature,cells.2])
+  return(total.diff)
+} 
+
+convertHumanGeneList <- function(x){
+  require("biomaRt")
+  human = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
+  mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")
+  genesV2 = getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = x , mart = human, attributesL = c("mgi_symbol"), martL = mouse, uniqueRows=T)
+  humanx <- unique(genesV2[, 2])
+  # Print the first 6 genes found to the screen
+  print(head(humanx))
+  return(humanx)
+}
diff --git a/R_src/getSigPerCells.R b/R_src/getSigPerCells.R
new file mode 100644
index 0000000000000000000000000000000000000000..af7e0d968794ce4190a05ab3da542223662fa500
--- /dev/null
+++ b/R_src/getSigPerCells.R
@@ -0,0 +1,193 @@
+getSignatures <- function(m,pheno,cellType,data,id_type = "gene_short_name",outdir="./",padj = 0.05) {
+  phenoTest <- pheno
+  phenoTest[which(phenoTest!=cellType)] <- "othersHSPC"
+  cat("Calling differentially expressed genes (DESeq2\n")
+  des <- DESeqDataSetFromMatrix(m, as.data.frame(phenoTest), design=formula(~phenoTest))
+  dds <- DESeq(des,parallel=T)
+  res <- results(dds,contrast=c("phenoTest",cellType,"othersHSPC"))
+  resOrdered <- res[order(res$padj),]
+  #save.image("image.Rdata")
+  out <- as.data.frame(resOrdered)
+  #resMLE<- results(dds, addMLE=TRUE)
+  res <- results(dds)
+  resOrdered <- res[order(res$padj),]
+  #save.image("image.Rdata")
+  out <- as.data.frame(resOrdered)
+  log2.counts <- log2(counts(dds, normalized=TRUE) + 1)
+  colnames(log2.counts) <- colnames(m)
+  out <- data.frame(out, log2.counts[rownames(out),],data[rownames(out),"Gene.Symbol"])
+  colnames(out)[length(colnames(out))] = "gene_short_name"
+  out_save <- out[which(out$padj < 0.05 & out$log2FoldChange<0),]
+  out_save <- out_save[order(out_save$padj),]
+  write.table(out_save,paste(outdir,"/",paste(cellType,"vs",paste(unique(pheno[which(pheno!=cellType)]),collapse = ""),sep = "_"),".tsv",sep =""),sep="\t",quote=F,col.names = NA)
+  
+  gene_sig <- as.vector(out_save[which(out_save$padj<padj),id_type])
+  return(list(genes = gene_sig, table = out))
+}
+
+
+
+AddSigScoreMonocle <- function(gbm_cds,ntop,sigRes,cellTypeScore) {
+  
+  seurat <- exportCDS(gbm_cds,"Seurat")
+
+  sigRes <- sigRes[which(sigRes$padj < 0.05),]
+  
+  print(head(sigRes))
+
+  gene_sig <- rownames(sigRes[which(sigRes$log2FoldChange<0),])[c(1:50)]
+  
+  print(head(gene_sig))
+
+  seurat <- AddModuleScore(seurat, genes.list = list(gene_sig), genes.pool = NULL, n.bin = 25,
+                  seed.use = 1, ctrl.size = 100, use.k = FALSE,
+                  random.seed = 1)
+  print("calcul ok")
+  print(head(seurat@meta.data))
+  pData(gbm_cds)$newScore <- seurat@meta.data[,length(colnames(seurat@meta.data))]
+  print("ok)")
+  colnames(pData(gbm_cds))[length(colnames(pData(gbm_cds)))] <- cellTypeScore
+  print("ok2")
+  print(plot_cell_trajectory(gbm_cds,color_by=cellTypeScore) + scale_color_gradient( low="grey",high="red"))
+  return(gbm_cds)
+}
+
+
+
+AddSigScore <- function(seurat,ntop,sigRes,scoreName) {
+  
+  sigRes <- sigRes[which(sigRes$padj < 0.05),]
+  
+  print(head(sigRes))
+  
+  gene_sig <- sigRes[which(sigRes$log2FoldChange<0),"gene_short_name"][c(1:50)]
+  
+  print(head(gene_sig))
+  
+  seurat <- AddModuleScore(seurat, genes.list = list(gene_sig), genes.pool = NULL, n.bin = 25,
+                           seed.use = 1, ctrl.size = 100, use.k = FALSE, enrich.name= scoreName,
+                           random.seed = 1)
+  print("calcul ok")
+  colnames(seurat@meta.data)[length(colnames(seurat@meta.data))] <- scoreName
+  print(head(seurat@meta.data))
+  return(seurat)
+}
+
+getKeggSig <- function(queryIndex) {
+  fullName <- queryIndex$NAME
+  name <- strsplit(fullName,split = " ")[[1]][1]
+  name <- gsub(name,pattern = "-",replacement = "_")
+  name <- paste(name,"_Kegg",sep ="")
+  print(name)
+  result <- data.frame(strsplit(queryIndex$GENE,";"))
+  result_t <- t(result)
+  result_vector <- as.vector(result_t[,1])
+  result_genes <- result_vector[!grepl("^[0-9]{1,}$", result_vector)]
+  
+  return(list(name = name,genes = result_genes))
+}
+
+getKeggSigList <- function(query) {
+  results <- lapply(query,getKeggSig)
+  names(results) <- unlist(lapply(results,'[[',1))
+  return(results)
+}
+
+
+getMicroArraySig <- function(file) {
+  arraySigName <- paste(strsplit(file,split="_")[[1]][2],"_Chambers",sep="")
+  arraySig <- as.vector(na.omit(read.table(paste(opt$input_microArraySigDir,file,sep="/"),sep =";",header =T,quote = "\"")$Gene.Symbol))
+  return(list(name = arraySigName,genes = arraySig))
+}
+
+getMicroArraySigList <- function(fileList) {
+  results <- lapply(fileList,getMicroArraySig)
+  names(results) <- unlist(lapply(results,'[[',1))
+  return(results)
+}
+
+
+read_excel_allsheets <- function(filename, tibble = FALSE) {
+  sheets <- readxl::excel_sheets(filename)
+  x <- lapply(sheets, function(X) readxl::read_excel(filename, sheet = X))
+  if(!tibble) x <- lapply(x, as.data.frame)
+  names(x) <- sheets
+  x
+}
+
+
+getGeneNameCol <- function(sheet) {
+  result <- sheet[,"Gene Symbol"]
+  result <- result[which(result != "NA")]
+  return(result)
+}
+
+
+getMicroArraySigListXls <- function(allsheets) {
+  allsheets <- allsheets[-1]
+  results <- lapply(allsheets,getGeneNameCol)
+  names(results) <-paste(gsub("\\.txt","",names(allsheets)),"_Chambers",sep="")
+  return(results)
+}
+
+
+
+
+
+getBM_vector <- function(goTerm,idType = "external_gene_name",mart) {
+  result <- getBM(attributes=c("ensembl_gene_id","external_gene_name","name_1006"),
+                  filters=c("go"),
+                  values=list(goTerm),mart=mart)
+  head(result)
+  out <- unique(result[,idType])
+  name <- gsub(Term(goTerm),pattern=" |-", replacement= "_")
+  name <- paste(name,"_GO",sep="")
+  print(name)
+  return(list(name = name,genes = out))
+}
+
+#Function to score cell in seurat with a gene signature
+
+# DEPRECATED Seurat 2
+# scoreCells <- function(seurat,signature,outdir,sigName) {
+#   #remove old slot
+#   if (!is.null(seurat@meta.data[[sigName]])) {
+#     seurat@meta.data[[sigName]] <- NULL
+#   }
+#   seurat <- AddModuleScore(seurat, genes.list = list(signature), genes.pool = NULL, n.bin = 25,
+#                            seed.use = 1, ctrl.size = 100, use.k = FALSE, enrich.name= sigName,
+#                            random.seed = 1)
+#   colnames(seurat@meta.data)[length(seurat@meta.data)] <- sigName
+#   #print(head(seurat@meta.data))
+#   if(!is.null(outdir)) {
+#   #print("plot")
+#   png(paste(outdir,"/",sigName,".png",sep =""))
+#   FeaturePlot(seurat,features.plot=sigName)
+#   dev.off()
+#   }
+#   return(seurat)
+#   
+# }
+
+#For seurat3
+
+scoreCells3 <- function(seurat,signature,outdir,sigName) {
+  #remove old slot
+  if (!is.null(seurat@meta.data[[sigName]])) {
+    seurat@meta.data[[sigName]] <- NULL
+  }
+  seurat <- AddModuleScore(seurat, features = list(signature), pool = NULL, nbin = 25,
+                           seed = 1, ctrl = 100, k = FALSE, name= sigName)
+  colnames(seurat@meta.data)[length(seurat@meta.data)] <- sigName
+  #print(head(seurat@meta.data))
+  if(!is.null(outdir)) {
+    #print("plot")
+    png(paste(outdir,"/",sigName,".png",sep =""))
+    plot(FeaturePlot(seurat,features=sigName))
+    dev.off()
+  }
+  return(seurat)
+  
+}
+## Test cell signature
+
diff --git a/R_src/getSignaturesCL.R b/R_src/getSignaturesCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..dec219d714d48a770cc221bc38854e7099657d2c
--- /dev/null
+++ b/R_src/getSignaturesCL.R
@@ -0,0 +1,101 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(getopt))
+suppressMessages(library(readxl))
+
+
+source("R_src/getSigPerCells.R")
+
+
+
+# Store previously published signature in an R object
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",	"Help about the program",
+  'input_tabfile', 't', 1, "character","tab file with gene lists per column (Geiger signatures tab file)",
+  'input_microArraySig',"m", 1, "character", "directory  with micro array cell type signatures (Chambers) or directly xlsx file from cell paper",
+  'outdir',      'o', 1, "character", 	"Output directory. Default to current working directory."),
+  byrow=TRUE, ncol=5)
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$input_tabfile) | is.null(opt$input_microArraySig)) {
+  cat("get signatures with microo array data and/or tab files from previous studies\n")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+
+
+sessionInfo()
+
+# output
+if(is.null(opt$outdir))
+  opt$outdir <- getwd()
+
+dir.create(opt$outdir, showWarnings=FALSE)
+
+#Miccro array sig
+
+if (dir.exists(opt$input_microArraySig)) {
+  arraySigFiles <- list.files(opt$input_microArraySig,full.names = F)
+  print(arraySigFiles)
+  arraySig <- getMicroArraySigList(arraySigFiles)
+  
+  
+  
+  allSignatures <- c(arraySig)
+  
+  allSignatures <- lapply(allSignatures,'[[',2)
+} else {
+  
+  # Miccro array sig directly from xlsx file
+  allsheets <- read_excel_allsheets(opt$input_microArraySig)
+  
+  allSignatures <- getMicroArraySigListXls(allsheets)
+  
+}
+
+#Tab file signatures
+
+if(endsWith(opt$input_tabfile,suffix = ".xls")) { #if downloaded file
+  tab_sig <- read_xls(opt$input_tabfile)
+  tab_sig <- as.data.frame(tab_sig)
+  tab_sig <- tab_sig[-2,]
+  colnames(tab_sig) <- tab_sig[1,]
+  tab_sig <- tab_sig[-1,]
+} else {
+  tab_sig <- read.table(opt$input_tabfile,sep = "\t",header=T)
+  
+}
+
+
+nonEmptyString <- function(stringVector){
+  result <- which(stringVector != "")
+  return(stringVector[result])
+}
+
+list_sig <- as.list(tab_sig)
+
+list_sig <- lapply(list_sig,nonEmptyString)
+
+
+allSignatures <- c(allSignatures,list_sig)
+
+
+
+saveRDS(allSignatures,paste(opt$outdir,"publicSignatures.rds",sep = ""))
+
diff --git a/R_src/installMotifsEnv.R b/R_src/installMotifsEnv.R
new file mode 100644
index 0000000000000000000000000000000000000000..5a491a311d4e757bb2adb925c8caa3c6c353afba
--- /dev/null
+++ b/R_src/installMotifsEnv.R
@@ -0,0 +1,47 @@
+if (!requireNamespace("BiocManager", quietly = TRUE))
+  install.packages("BiocManager")
+print("Install devtools")
+install.packages("devtools",repos = "http://cran.us.r-project.org")
+
+print("Install last signac version")
+#devtools::install_github("timoast/signac", ref = "develop")
+
+BiocManager::install("chromVAR")
+
+BiocManager::install("motifmatchr")
+
+# BiocManager::install("BSgenome.Mmusculus.UCSC.mm10")
+
+# BiocManager::install("TFBSTools")
+
+
+# To automatically install Bioconductor dependencies
+setRepositories(ind=1:2)
+
+# BiocManager::install("GenomeInfoDb")
+
+# BiocManager::install("EnsDb.Mmusculus.v79")
+
+BiocManager::install("ggbio")
+
+#BiocManager::install("EnsDb.Mmusculus.v79")
+
+
+install.packages("Signac",repos='http://cran.us.r-project.org')
+#install.packages("devtools")
+#devtools::install_github("timoast/signac", ref = "develop")
+#packageVersion("Signac") 
+
+# remotes::install_github("da-bar/JASPAR2020")
+
+# remotes::install_github("satijalab/seurat", ref = "release/4.0.0")
+
+remotes::install_github("jlmelville/uwot")
+
+# library(BSgenome.Mmusculus.UCSC.mm10)
+# library(TFBSTools)
+# library(GenomeInfoDb)
+# library(EnsDb.Mmusculus.v79)
+# library(ggbio)
+# library(Signac)
+
diff --git a/R_src/installSignacSeurat4.R b/R_src/installSignacSeurat4.R
new file mode 100644
index 0000000000000000000000000000000000000000..7a320e64f519168461d953f349a7983b3bf1eb6a
--- /dev/null
+++ b/R_src/installSignacSeurat4.R
@@ -0,0 +1,30 @@
+# Install bioconductor
+
+if (!requireNamespace("BiocManager", quietly = TRUE))
+  install.packages("BiocManager",repos='http://cran.us.r-project.org')
+
+# To automatically install Bioconductor dependencies
+setRepositories(ind=1:2)
+
+# BiocManager::install("GenomeInfoDb")
+
+# BiocManager::install("EnsDb.Mmusculus.v79")
+
+BiocManager::install("ggbio")
+
+#BiocManager::install("EnsDb.Mmusculus.v79")
+
+
+install.packages("Signac",repos='http://cran.us.r-project.org')
+
+
+# remotes::install_github("satijalab/seurat", ref = "release/4.0.0")
+# remotes::install_version("Seurat", version = "4.0.3")
+
+remotes::install_github("jlmelville/uwot")
+
+
+
+
+
+
diff --git a/R_src/load_10X_Seurat.R b/R_src/load_10X_Seurat.R
new file mode 100644
index 0000000000000000000000000000000000000000..cc7facd1f4f3840a04580e2d1c6a01a84d79c306
--- /dev/null
+++ b/R_src/load_10X_Seurat.R
@@ -0,0 +1,124 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+
+suppressMessages(library("getopt"))
+suppressMessages(library("Matrix"))
+suppressMessages(library("Seurat"))
+
+source("R_src/funForLoading.R")
+
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputMatrixDir', 'b', 1, "character", "REQUIRED:10X expression data (matrix files directory path)",
+  'outfile',     'f',1, "character", 'Output file path (default .10X_data.rds)',
+  'cellranger',   'c',1, "character", "cellranger version used to generate matrix files (default 2)",
+  "subSample",    "p",1, "character", "proportion of cell to subsample (by default all the cells are used (e.g. 0.5))",
+  "sampleInfo",   "s", 1, "character", "sample information that will be added to pData with the following format: age=2_months,runDate=10_12_2017..",
+  "sampleName",   "n", 1, "character", "if matrix file have the sampl name as prefix (name_matrix.mtx...) provide it here"
+  
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+# For test
+# opt <- list()
+# opt$inputMatrixDir <- "input/RAW/PLZF_RARA_CT/"
+# opt$cellranger <- 4
+# opt$sampleInfo <- "age=Adult,runDate=11/2020,sampleName=RNA_Ctrl"
+# opt$outfile <- "input/10X_data_RNA_Ctrl.rds"
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputMatrixDir)) {
+  cat("loading 10X genomics single cells data and save it as a seurat R object.")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+## -----------------------------------------------------------------------------
+## Processing data
+## -----------------------------------------------------------------------------
+
+# output
+
+if(is.null(opt$outfile)) {
+  opt$outfile <- "10X_data.rds"
+  outdir = './'
+} else {
+  outdir <- paste(strsplit(opt$outfile,"/")[[1]][-length(strsplit(opt$outfile,"/")[[1]])],collapse="/")
+  dir.create(outdir, showWarnings=FALSE)
+}
+
+
+if(is.null(opt$cellranger)) {
+  opt$cellranger <- "2"
+}
+
+
+if (opt$cellranger == "3" | opt$cellranger == "4") {
+  gbm <- loadCellRangerMatrix_cellranger3(opt$inputMatrixDir,sample_name=opt$sampleName)
+} else
+{
+  gbm <- loadCellRangerMatrix(opt$inputMatrixDir,sample_name=opt$sampleName)
+}
+
+
+fd <- gbm$fd
+pd <- gbm$pd
+
+# Column 'symbol' is the one (from cellRanger workflow) that corresponds to featureData's gene short names.
+
+colnames(fd)[which(colnames(fd)=="symbol")]<- "gene_short_name"
+
+for (c in c(1:length(colnames(fd)))){
+  if (is.na(colnames(fd)[c])) {
+    colnames(fd)[c] <- paste0("col_",c)
+  }
+}
+
+
+seurat <- CreateSeuratObject(counts = gbm$exprs,
+                             assay = "RNA",
+                             meta.data = gbm$pd)
+
+# ## Subsample ## DEPRECATED
+# if(is.null(opt$subSample)==F) {
+#   opt$subSample <- as.numeric(opt$subSample)
+#   print(opt$subSample)
+#   
+#   cellSubset <- sample(rownames(pData(gbm_cds)),size = opt$subSample*length(rownames(pData(gbm_cds))))
+#   gbm_cds <- gbm_cds[,cellSubset] 
+# }
+
+
+#Add sample infos
+
+print(opt$sampleInfo)
+sampleInfos <- strsplit(opt$sampleInfo,split=",")[[1]]
+for (i in sampleInfos) {
+  print(i)
+  info <- strsplit(i,split = "=")[[1]][1]
+  print(info)
+  value <- strsplit(i,split = "=")[[1]][2]
+  print(value)
+  seurat@meta.data[,info] <- value
+}
+
+
+output <- list(seurat = seurat,featureData = fd)
+
+saveRDS(output,file = opt$outfile)
+
+
diff --git a/R_src/load_10X_data.R b/R_src/load_10X_data.R
new file mode 100644
index 0000000000000000000000000000000000000000..f32a6c19c47905bef393f7f536a8b3e7c4754fa0
--- /dev/null
+++ b/R_src/load_10X_data.R
@@ -0,0 +1,112 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+
+suppressMessages(library("getopt"))
+suppressMessages(library("Matrix"))
+suppressMessages(library("monocle"))
+
+source("R_src/funForLoading.R")
+
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputMatrixDir', 'b', 1, "character", "REQUIRED:10X expression data (matrix files directory path)",
+  'outfile',     'f',1, "character", 'Output file path (default .10X_data.rds)',
+  'cellranger',   'c',1, "character", "cellranger version used to generate matrix files (default 2)",
+  "subSample",    "p",1, "character", "proportion of cell to subsample (by default all the cells are used (e.g. 0.5))",
+  "sampleInfo",   "s", 1, "character", "sample information that will be added to pData with the following format: age=2_months,runDate=10_12_2017..",
+  "sampleName",   "n", 1, "character", "if matrix file have the sampl name as prefix (name_matrix.mtx...) provide it here"
+  
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputMatrixDir)) {
+  cat("loading 10X genomics single cells data and save it as a monocle R object.")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+## -----------------------------------------------------------------------------
+## Processing data
+## -----------------------------------------------------------------------------
+
+# output
+if(is.null(opt$outfile)) {
+  opt$outfile <- "10X_data.rds"
+} else {
+  outdir <- paste(strsplit(opt$outfile,"/")[[1]][-length(strsplit(opt$outfile,"/")[[1]])],collapse="/")
+  dir.create(outdir, showWarnings=FALSE)
+}
+
+
+if(is.null(opt$cellranger)) {
+  opt$cellranger <- "2"
+}
+
+
+if (opt$cellranger == "3" | opt$cellranger == "4") {
+  gbm <- loadCellRangerMatrix_cellranger3(opt$inputMatrixDir,sample_name=opt$sampleName)
+} else
+{
+  gbm <- loadCellRangerMatrix(opt$inputMatrixDir,sample_name=opt$sampleName)
+}
+
+
+fd <- gbm$fd
+pd <- gbm$pd
+
+# Column 'symbol' is the one (from cellRanger workflow) that corresponds to featureData's gene short names.
+
+colnames(fd)[which(colnames(fd)=="symbol")]<- "gene_short_name"
+
+for (c in c(1:length(colnames(fd)))){
+  if (is.na(colnames(fd)[c])) {
+    colnames(fd)[c] <- paste0("col_",c)
+  }
+}
+
+gbm_cds <- newCellDataSet(gbm$exprs,
+                          phenoData = new("AnnotatedDataFrame", data = pd),
+                          featureData = new("AnnotatedDataFrame", data = fd),
+                          expressionFamily = negbinomial.size())
+
+## Subsample
+if(is.null(opt$subSample)==F) {
+  opt$subSample <- as.numeric(opt$subSample)
+  print(opt$subSample)
+  
+  cellSubset <- sample(rownames(pData(gbm_cds)),size = opt$subSample*length(rownames(pData(gbm_cds))))
+  gbm_cds <- gbm_cds[,cellSubset] 
+}
+
+
+#Add sample infos
+
+print(opt$sampleInfo)
+sampleInfos <- strsplit(opt$sampleInfo,split=",")[[1]]
+for (i in sampleInfos) {
+  print(i)
+  info <- strsplit(i,split = "=")[[1]][1]
+  print(info)
+  value <- strsplit(i,split = "=")[[1]][2]
+  print(value)
+  pData(gbm_cds)[[info]] <- value
+}
+
+saveRDS(gbm_cds,file = opt$outfile)
+
diff --git a/R_src/makeBranchedHeatmap.R b/R_src/makeBranchedHeatmap.R
new file mode 100644
index 0000000000000000000000000000000000000000..b1cb8db38130e1514fe0523ff1920105a0bee829
--- /dev/null
+++ b/R_src/makeBranchedHeatmap.R
@@ -0,0 +1,267 @@
+regulons_heatmap2 <- function(monocle, 
+                              regulons,
+                              first_traj_states=c(1,2), 
+                              second_traj_states=c(1,3),
+                              cluster=3,
+                              legend=TRUE,
+                              add_annotation_row = F,
+                              showTree = F,
+                              clusterRow =T,
+                              gaps_row = NULL,
+                              legend_breaks = NA,
+                              clusterFirstData = T,
+                              colorStates = NULL,
+                              breaksAdapt = FALSE,
+                              clustering_method = "complete"){
+  
+  
+  rownames(regulons) <- regulons[,1]
+  regulons <- regulons[,-1]
+  regulons_pos <- regulons[,which(grepl(x = colnames(regulons),pattern = "\\(n\\)|(\\+)"))]
+  
+  metaData_monocle <- pData(monocle)
+  breaklist <- seq(-5,5, length.out=100)
+  
+  
+  getMerge <- function(metaData_monocle,breaklist) {
+    
+    first_traj <- metaData_monocle[which(metaData_monocle$State %in% first_traj_states),]
+    
+    second_traj <- metaData_monocle[which(metaData_monocle$State %in% second_traj_states),]
+    
+    regulons_fit_first <- regulons_pos[rownames(first_traj),]
+    regulons_fit_second <- regulons_pos[rownames(second_traj),]
+    
+    for(i in colnames(regulons_pos)){
+      if(!unique(is.na(second_traj[,i]))) {
+        set.seed(1)
+        gam_second <-gam(formula = second_traj[,i] ~ s(Pseudotime, bs ="cr"), data= second_traj)
+        regulons_fit_second[,i] <- gam_second$fitted.values
+        gam_first <-gam(formula = first_traj[,i] ~ s(Pseudotime, bs ="cr"), data= first_traj)
+        
+        # gam_second <-gam(formula = second_traj[,i] ~ s(Pseudotime,bs = "cs"), data= second_traj)
+        # regulons_fit_second[,i] <- gam_second$fitted.values
+        # gam_first <-gam(formula = first_traj[,i] ~ s(Pseudotime,bs = "cs"), data= first_traj)
+        
+        regulons_fit_first[,i] <- gam_first$fitted.values
+      } else {
+        regulons_fit_second[,i] <- rep(NA,dim(second_traj)[1])
+        regulons_fit_first[,i] <- rep(NA,dim(first_traj)[1])
+      }
+      
+    }
+    regulons_fit_second <- data.frame(regulons_fit_second, check.names = FALSE)
+    
+    regulons_fit_first <- data.frame(regulons_fit_first, check.names = FALSE)
+    
+    
+    max_pseudotime <- max(c(max(second_traj$Pseudotime), max(first_traj$Pseudotime)))
+    metaData_monocle$percent_Pseudotime <- 100*metaData_monocle$Pseudotime/max_pseudotime
+    second_traj$percent_Pseudotime <- 100*second_traj$Pseudotime/max_pseudotime
+    first_traj$percent_Pseudotime <- 100*first_traj$Pseudotime/max_pseudotime
+    
+    percent_state_second <- c()
+    
+    for (s in second_traj_states){
+      percent_state <- floor((max(second_traj[which(second_traj$State == s), "Pseudotime"])*100)/max_pseudotime)
+      percent_state_second <- c(percent_state_second,percent_state)
+    }
+    
+    percent_state_second[length(percent_state_second)] <- 100 #set max at 100 it will be cut if necessary in the loop
+    
+    newdata_second <- data.frame(Pseudotime = seq(1, 100, by = 1),
+                                 Branch = as.factor(rep(c(second_traj_states),c(diff(c(0,percent_state_second))))))
+    
+    
+    percent_state_first <- c()
+    for (s in first_traj_states){
+      percent_state <- floor((max(first_traj[which(first_traj$State == s), "Pseudotime"])*100)/max_pseudotime)
+      percent_state_first <- c(percent_state_first,percent_state)
+    }
+    
+    percent_state_first[length(percent_state_first)] <- 100 #set max at 100 it will be cut if necessary in the loop
+    
+    
+    newdata_first <- data.frame(Pseudotime = seq(1, 100, by = 1),
+                                Branch = as.factor(rep(c(first_traj_states),c(diff(c(0,percent_state_first))))))
+    
+    
+    #Setting up the object for the score
+    cell_names_second <- list() # not used apparantly
+    regulons_score_percent_pseudotime_second <- data.frame(matrix(NA, nrow = 100, ncol=ncol(regulons_fit_second)))
+    colnames(regulons_score_percent_pseudotime_second) <- colnames(regulons_fit_second)
+    
+    cell_names_first <- list()
+    regulons_score_percent_pseudotime_first <- data.frame(matrix(NA, nrow = 100, ncol=ncol(regulons_fit_first)))
+    colnames(regulons_score_percent_pseudotime_first) <- colnames(regulons_fit_first)
+    
+    #mean score on the % pseudotime for each 1% (the mean of each regulons score is done on the cell having a pseudotime between i and i+1)
+    for(i in 2:nrow(newdata_second)){
+      cell_names_second[[1]] <- rownames(second_traj[0<=second_traj$percent_Pseudotime & second_traj$percent_Pseudotime<=1,])
+      cell_names_second[[i]] <- rownames(subset(second_traj, percent_Pseudotime>newdata_second[(i-1),1] & percent_Pseudotime<=newdata_second[i,1]))
+      regulons_score_percent_pseudotime_second[1,] <- apply(regulons_fit_second[rownames(second_traj[0<=second_traj$percent_Pseudotime & second_traj$percent_Pseudotime<=1,]),],2,mean) #  should use cell_names
+      regulons_score_percent_pseudotime_second[i,] <- apply(regulons_fit_second[rownames(subset(second_traj, percent_Pseudotime>newdata_second[(i-1),1] & percent_Pseudotime<=newdata_second[i,1])),], 2, mean)
+      
+      cell_names_first[[1]] <- rownames(first_traj[0<=first_traj$percent_Pseudotime & first_traj$percent_Pseudotime<=1,])
+      cell_names_first[[i]] <- rownames(subset(first_traj, percent_Pseudotime>newdata_first[(i-1),1] & percent_Pseudotime<=newdata_first[i,1]))
+      regulons_score_percent_pseudotime_first[1,] <- apply(regulons_fit_first[rownames(first_traj[0<=first_traj$percent_Pseudotime & first_traj$percent_Pseudotime<=1,]),],2,mean)
+      regulons_score_percent_pseudotime_first[i,] <- apply(regulons_fit_first[rownames(subset(first_traj, percent_Pseudotime>newdata_first[(i-1),1] & percent_Pseudotime<=newdata_first[i,1])),], 2, mean)
+    }
+    regulons_score_percent_pseudotime_second <- regulons_score_percent_pseudotime_second[which(rowSums(is.na(regulons_score_percent_pseudotime_second))!= ncol(regulons_score_percent_pseudotime_second)), ]
+    regulons_score_percent_pseudotime_first <- regulons_score_percent_pseudotime_first[which(rowSums(is.na(regulons_score_percent_pseudotime_first))!= ncol(regulons_score_percent_pseudotime_first)), ]
+    
+    regulons_score_percent_pseudotime_merge <- as.data.frame(rbind(regulons_score_percent_pseudotime_first[nrow(regulons_score_percent_pseudotime_first):1,],
+                                                                   regulons_score_percent_pseudotime_second), check.names =FALSE)
+    
+    
+    
+    
+    
+    
+    annot_list_merge <- data.frame(c(as.character(newdata_first[nrow(regulons_score_percent_pseudotime_first):1,2]), as.character(newdata_second[1:nrow(regulons_score_percent_pseudotime_second),2])))
+    colnames(annot_list_merge) <- "State"
+    rownames(annot_list_merge) <- rownames(regulons_score_percent_pseudotime_merge)
+    
+    
+    #Setting color panel for the heatmap
+    breaklist <- seq(-5,5, length.out=100)
+    
+    colnames(regulons_score_percent_pseudotime_merge) <- str_split_fixed(string = colnames(regulons_score_percent_pseudotime_merge),pattern = "\\(",n=2)[,1]
+    res <- list("score" = regulons_score_percent_pseudotime_merge,
+                "gap" = regulons_score_percent_pseudotime_first,
+                "annot" = annot_list_merge)
+    
+    return(res)
+  }
+  
+  regulons_score_percent_pseudotime_merge_young <- getMerge(metaData_monocle[which(metaData_monocle$AGE == "Young"),],breaklist)
+  rownames(regulons_score_percent_pseudotime_merge_young$score) <- paste0("young_",c(1:length(rownames(regulons_score_percent_pseudotime_merge_young$score))))
+  rownames(regulons_score_percent_pseudotime_merge_young$annot) <- paste0("young_",c(1:length(rownames(regulons_score_percent_pseudotime_merge_young$score))))
+  
+  
+  
+  regulons_score_percent_pseudotime_merge_old <- getMerge(metaData_monocle[which(metaData_monocle$AGE == "Old"),],breaklist)
+  rownames(regulons_score_percent_pseudotime_merge_old$score) <- paste0("old_",c(1:length(rownames(regulons_score_percent_pseudotime_merge_old$score))))
+  rownames(regulons_score_percent_pseudotime_merge_old$annot) <- paste0("old_",c(1:length(rownames(regulons_score_percent_pseudotime_merge_old$annot))))
+  
+  
+  regulons_score_percent_pseudotime_merge <- as.data.frame(rbind(regulons_score_percent_pseudotime_merge_young$score,
+                                                                 regulons_score_percent_pseudotime_merge_old$score), check.names =FALSE)
+  
+  scale_regulons_score_percent_pseudotime_merge <- data.frame(scale(regulons_score_percent_pseudotime_merge), check.names=FALSE)
+  
+  annot_list_merge <- rbind(regulons_score_percent_pseudotime_merge_young$annot,
+                            regulons_score_percent_pseudotime_merge_old$annot)
+  
+  
+  #Setting color legend for the different States
+  annot_color <- list()
+  
+  states <- unique(c(first_traj_states,second_traj_states))
+  
+  if (is.null(colorStates)) {
+    colorStates <- hue_pal()(length(levels(monocle$State)))[states]
+  }
+  c <- 1
+  
+  for (s in states) {
+    annot_color$State[[as.character(s)]] <- colorStates[c]
+    c <- c+1
+  }
+  
+  
+  gaps_col = c(nrow(regulons_score_percent_pseudotime_merge_young$gap),nrow(regulons_score_percent_pseudotime_merge_young$score),nrow(regulons_score_percent_pseudotime_merge_young$score) + nrow(regulons_score_percent_pseudotime_merge_old$gap))
+  
+  
+  
+  
+  if(clusterFirstData) {
+    youngDataOnly <- t(scale_regulons_score_percent_pseudotime_merge[which(startsWith(rownames(scale_regulons_score_percent_pseudotime_merge),"young")),])
+    
+    
+    ## hclust
+
+    hclustRes <- hclust( dist(youngDataOnly, method = "euclidian"),method = clustering_method)
+
+    
+    gapsLabelHclust <- cutree(hclustRes,k =cluster)
+    
+    gapsLabel <- gapsLabelHclust
+    regulonsYoungOrder <- rownames(youngDataOnly)[hclustRes$order]
+    
+    gaps <- gapsLabel[regulonsYoungOrder]
+    
+    gaps_row <- c()
+    
+    for (g in c(1:(length(gaps)-1))) {
+      if (gaps[g] != gaps[g+1]) {
+        gaps_row <- c(gaps_row,g)
+      }
+    }
+    
+    scale_regulons_score_percent_pseudotime_merge <- scale_regulons_score_percent_pseudotime_merge[,regulonsYoungOrder]
+    
+    
+    cutTreeRow = NA
+    
+    if (breaksAdapt) {
+      breaklist_2 <- breaklist
+      breaklist_2[length(breaklist)] <- max(max(scale_regulons_score_percent_pseudotime_merge),max(breaklist))
+      breaklist_2[1] <- min(min(scale_regulons_score_percent_pseudotime_merge), min(breaklist))
+    } else {
+      breaklist_2 <- NA
+    }
+    heatmap <- pheatmap(t(scale_regulons_score_percent_pseudotime_merge),
+                        show_colnames = FALSE,
+                        cluster_cols = FALSE,
+                        gaps_col = gaps_col,
+                        color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaklist)),
+                        annotation_col = annot_list_merge,
+                        annotation_colors = annot_color,
+                        legend=legend,
+                        annotation_legend=legend,
+                        cluster_rows = FALSE,
+                        cutree_rows = cutTreeRow,
+                        gaps_row = gaps_row,
+                        breaks = breaklist_2,
+                        na_col = "black",
+                        legend_breaks = legend_breaks)
+    
+    
+    
+  } else {
+    
+    cutTreeRow = cluster
+    
+    if (breaksAdapt) {
+      breaklist_2 <- breaklist
+      breaklist_2[length(breaklist)] <- max(max(scale_regulons_score_percent_pseudotime_merge),max(breaklist))
+      breaklist_2[1] <- min(min(scale_regulons_score_percent_pseudotime_merge), min(breaklist))
+    } else {
+      breaklist_2 <- NA
+    }
+    
+    heatmap <- pheatmap(t(scale_regulons_score_percent_pseudotime_merge),
+                        show_colnames = FALSE,
+                        cluster_cols = FALSE,
+                        gaps_col = gaps_col,
+                        color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaklist)),
+                        annotation_col = annot_list_merge,
+                        annotation_colors = annot_color,
+                        legend=legend,
+                        annotation_legend=legend,
+                        cluster_rows = clusterRow,
+                        cutree_rows = cutTreeRow,
+                        clustering_method = clustering_method,
+                        breaks = breaklist_2,
+                        na_col = "black",
+                        legend_breaks = legend_breaks)
+    
+    
+    
+  }
+  return(heatmap) 
+} 
+
+
+
diff --git a/R_src/makeRegulonTableCL.R b/R_src/makeRegulonTableCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..083bd934fbd9b84c1eb3ddbc8dc59fe9885f7eca
--- /dev/null
+++ b/R_src/makeRegulonTableCL.R
@@ -0,0 +1,156 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(getopt))
+suppressMessages(library(stringr))
+suppressMessages(library(plyr))
+
+
+# Analysis of seurat 3 integration and clusternig workflow
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "tfTested",         't',1, "character", "TF tested list (eg TF with a motif in the scenic database",
+  "regulonJson",       'r',1, "character", "Regulon main Json file",
+  "regulonJsonSupp", "s", 1, "character", "Regulon supp Json files (separated by +)",
+  "subConditionName", "n", 1, "character", "correspondong names (separated by +)"
+  
+), byrow=TRUE, ncol=5);
+
+
+
+opt = getopt(spec)
+
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$regulonJson)) {
+  cat("Create regulons tabme")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+
+if (is.null(opt$logfc_threshold)) {
+  opt$logfc_threshold = 0.25
+} 
+if (is.null(opt$norm_method)) {
+  opt$norm_method = "logNorm"
+} 
+
+# Printing option before running
+
+for (o in names(opt)) {
+  print(paste(o,":", opt[[o]]))
+}
+
+# For testing
+# setwd("/shared/projects/scRNA_HSPC_Aging/scRNA_infer/")
+# 
+# opt <- list()
+# opt$tfTested <- "output/publicData/mm_mgi_tfs.txt"
+# 
+# opt$regulonJson <- "output/ScenicRNA_multipleRuns/cis_target_maskDropouts/aggregatedRegulonsMetaTest.json"
+# 
+# opt$regulonJsonSupp <- "output/ScenicRNA_multipleRuns_young/cis_target_maskDropouts/aggregatedRegulonsMeta.json+output/ScenicRNA_multipleRuns_old/cis_target_maskDropouts/aggregatedRegulonsMeta.json"
+# opt$subConditionName <- "young+old"
+# opt$outdir <- 'output/regulonAnalysis/'
+
+dir.create(opt$outdir,showWarnings = F,recursive = T)
+
+jsonSupp <- strsplit(opt$regulonJsonSupp,split = "\\+")[[1]]
+subConditionName <- strsplit(opt$subConditionName,split = "\\+")[[1]]
+
+suppData <- cbind(jsonSupp,subConditionName)
+
+###
+
+
+tf_mouse <- read.table(opt$tfTested)
+length(tf_mouse$V1)
+
+tf_tested <- tf_mouse$V1
+
+
+# Interaction table regulons and their target genes
+
+
+makeTable <- function(regulonJsonAndName) {
+  
+  regulonJsonAndName <- unlist(regulonJsonAndName)
+  regulonJson <- regulonJsonAndName[1]
+  if(!is.na(regulonJsonAndName[2])) {
+    suffixCol <- paste0("_",regulonJsonAndName[2])
+  } else {
+    suffixCol <- ""
+  }
+  print(suffixCol)
+  
+  regulons <- RJSONIO::fromJSON(regulonJson)
+  
+  regulonTable<- matrix(ncol = 6)
+  colnames(regulonTable) <- c("regulon","gene","mor",
+                              paste0("recoveredTimes",suffixCol),
+                              paste0("importanceMean",suffixCol),
+                              paste0("importanceSd",suffixCol))
+  
+  
+  for (i in names(regulons)) {
+    mor = 1 
+    if (endsWith(i,"(-)")) {
+      mor <- -1
+    }
+    for (t in names(regulons[[i]])) {
+      importanceMean <- NA
+      importanceSd <- NA
+      tf <- str_split_fixed(i,"\\(",n=2)[,1]
+      if(tf != t) {
+        importanceMean <- regulons[[i]][[t]][[2]]
+        importanceSd <- regulons[[i]][[t]][[3]]
+      }
+      add <- unlist(c(tf,t,mor,regulons[[i]][[t]][[1]][1],importanceMean,importanceSd))
+      regulonTable<- rbind(regulonTable,add)
+    }
+  }
+  regulonTable <- data.frame(regulonTable[-1,]) ## first line is NA
+  regulonTable$interaction <- paste(regulonTable$regulon,regulonTable$gene,regulonTable$mor,sep = "_") 
+  rownames(regulonTable) <- regulonTable$interaction
+  return(regulonTable)
+}
+
+
+mainRegulonTable <- makeTable(regulonJsonAndName = c(opt$regulonJson, NULL))
+
+if (!is.null(opt$subConditionName)) {
+  conditions <- mapply(list, jsonSupp, subConditionName, SIMPLIFY=F)
+  suppRegulonTables <- lapply(conditions, makeTable)
+  ## write the supp tables
+  for (c in (1:length(conditions))) {
+  write.table(suppRegulonTables[[c]],paste0(opt$outdir,"/",conditions[[c]][[2]],"RegulonTable.tsv"),sep = '\t')
+  }
+}
+
+#Add interaction recovered in one or two condition in the main table
+
+for (table in suppRegulonTables) {
+  mainRegulonTable <- cbind(mainRegulonTable,table[rownames(mainRegulonTable),c(4:6)])
+}
+
+## Write the tables
+write.table(mainRegulonTable,paste0(opt$outdir,"/mainRegulonTable.tsv"),sep = '\t',quote = F,row.names = F)
+
+
+
+
diff --git a/R_src/newMergingATAC_temp.R b/R_src/newMergingATAC_temp.R
new file mode 100644
index 0000000000000000000000000000000000000000..299336fd7f0ccef107fca5ed5655243dd5747fc7
--- /dev/null
+++ b/R_src/newMergingATAC_temp.R
@@ -0,0 +1,156 @@
+suppressMessages(library(Signac))
+suppressMessages(library(Seurat))
+suppressMessages(library(getopt))
+suppressMessages(library(gridExtra))
+suppressMessages(library(scales))
+suppressMessages(library(grid))
+suppressMessages(library(S4Vectors))
+suppressMessages(library(patchwork))
+suppressMessages(library(ggplot2))
+
+# set.seed(2021)
+# dir.create("/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/ATAC/integration_v2/", recursive = T)
+outdir <- "/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/ATAC/integration_v2/"
+
+atac_CT <- readRDS("/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/ATAC/smp/Ctrl/atac.rds")
+atac_RA <- readRDS("/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/ATAC/smp/RA/atac.rds")
+
+granges_CT <- granges(atac_CT)
+granges_RA <- granges(atac_RA)
+
+combined.peaks <- reduce(x = c(granges_CT, granges_RA))
+
+fragment_CT <- Fragments(atac_CT)
+fragment_RA <- Fragments(atac_RA)
+
+counts_CT <- FeatureMatrix(
+  fragments = fragment_CT,
+  features = combined.peaks,
+  cells = rownames(atac_CT@meta.data)
+)
+
+counts_RA <- FeatureMatrix(
+  fragments = fragment_RA,
+  features = combined.peaks,
+  cells = rownames(atac_RA@meta.data)
+)
+
+
+# Create the objects 
+## Chromatin then seurat
+CT_assay <- CreateChromatinAssay(counts = counts_CT, fragments = fragment_CT)
+RA_assay <- CreateChromatinAssay(counts = counts_RA, fragments = fragment_RA)
+
+CT_seurat <- CreateSeuratObject(CT_assay, assay = "peaks", meta.data = atac_CT@meta.data)
+RA_seurat <- CreateSeuratObject(RA_assay, assay = "peaks", meta.data = atac_RA@meta.data)
+
+CT_seurat$condition <- "Control"
+RA_seurat$condition <- "Treated"
+
+
+intersecting.regions <- findOverlaps(query = CT_seurat, subject = RA_seurat)
+intersections.2 <- unique(queryHits(intersecting.regions))
+# choose a subset of intersecting peaks
+peaks.use <- sort(granges(CT_seurat)[sample(intersections.2, size = 10000, replace = FALSE)])
+
+peaks <- FeatureMatrix(
+  fragments = Fragments(RA_seurat),
+  features = peaks.use,
+  cells = colnames(RA_seurat)
+)
+
+# create a new assay and add it to the second sample dataset
+RA_seurat[['CTSamplePeaks']] <- CreateChromatinAssay(
+  counts = peaks,
+  min.cells = 1,
+  ranges = peaks.use,
+  genome = 'mm10'
+)
+
+DefaultAssay(RA_seurat) <- 'CTSamplePeaks'
+RA_seurat <- RunTFIDF(RA_seurat)  ##normalisation
+
+### Integration
+# create a new assay in the first sample-ATAC-seq dataset containing the common peaks
+peaknames <- GRangesToString(grange = peaks.use)
+
+CT_seurat[['CTSamplePeaks']] <- CreateChromatinAssay(
+  counts <- GetAssayData(CT_seurat, assay = "peaks", slot = "counts")[peaknames, ],
+  ranges = peaks.use,
+  genome = "mm10"
+)
+
+# run TF-IDF for the new assay
+DefaultAssay(CT_seurat) <- "CTSamplePeaks"
+CT_seurat <- RunTFIDF(CT_seurat)
+ 
+# CT_seurat <- RenameCells(CT_seurat[["peaks"]],
+#                          new.names = paste0(colnames(CT_seurat[["peaks"]], "_CT")))
+# CT_seurat <- RunTFIDF(CT_seurat)
+# RA_seurat <- RunTFIDF(RA_seurat)
+print("Find Anchors")
+anchors <- FindIntegrationAnchors(
+  object.list = list(CT_seurat, RA_seurat),
+  anchor.features = rownames(CT_seurat),
+  assay = c('CTSamplePeaks', 'CTSamplePeaks'),
+  k.filter = NA
+)
+
+# integrate data and create a new merged object
+print("Integrate data")
+integrated <- IntegrateData(
+  anchorset = anchors,
+  dims = 2:30,
+  preserve.order = TRUE
+)
+
+print("RunSVD")
+integrated <- RunSVD(
+  object = integrated,
+  n = 30,
+  reduction.name = 'integratedLSI'
+)
+
+print("RunUMAP")
+integrated <- RunUMAP(
+  object = integrated,
+  dims = 2:30,
+  reduction = 'integratedLSI'
+)
+
+print("Save RDS")
+saveRDS(integrated, paste0(outdir, "atac_merged_integrated.rds"))
+
+UMAP_newInt <- DimPlot(integrated, group.by = "condition")
+
+
+png(paste0(outdir,"UMAP_merge_integrated_condition.png"),units = "cm",height = 15,width=15,res = 300)
+UMAP_newInt
+dev.off()
+
+# combined_seurat <- merge(
+#   x = CT_seurat,
+#   y = RA_seurat,
+#   add.cell.ids = c("CT", "RA")
+# )
+# 
+# combined_seurat <- RunTFIDF(combined_seurat)
+# combined_seurat <- FindTopFeatures(combined_seurat, min.cutoff = 'q75')
+# combined_seurat <- RunSVD(combined_seurat, n = 40)
+# 
+# png(paste0(outdir,"DepthCor.png"),units = "in",height = 6,width=6,res = 300)
+# DepthCor(combined_seurat)
+# dev.off()
+# 
+# combined_seurat <- RunUMAP(combined_seurat, dims = 1:30, reduction = "lsi")
+# 
+# saveRDS(combined_seurat, paste0(outdir, "atac_integrated.rds"))
+
+# atac_int <- RunUMAP(atac_int, dims = 2:30, reduction = "lsi")
+# 
+# UMAP_newInt <- DimPlot(atac_int, group.by = "condition")
+# 
+# 
+# png(paste0(outdir,"UMAP_condition.png"),units = "cm",height = 15,width=15,res = 300)
+# UMAP_newInt
+# dev.off()
diff --git a/R_src/orderingSeurat3IntegratedMonocleCL.R b/R_src/orderingSeurat3IntegratedMonocleCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..b44f6eecc0b5da6c45b392cdbbe6ddf2f3c4dc55
--- /dev/null
+++ b/R_src/orderingSeurat3IntegratedMonocleCL.R
@@ -0,0 +1,314 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(Seurat))
+suppressMessages(library(monocle))
+suppressMessages(library(gridExtra))
+suppressMessages(library(RColorBrewer))
+suppressMessages(library(getopt))
+
+# Monocle 2 pseudotime ordering with seurat intgrated and scaled data
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputSeurat',  'i', 1, "character", "REQUIRED: 10X data prepared as seurat object with clustering results (.RDS generated by prepare_data.R).",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "num_comp",      'n',1,"numeric", "Number of dimension in the trajectory space (default 2)",
+  "signatureFile",  's',1, "character", "Signature rds file path",
+  "clustCol",       "k", 1, "character", "cluster column in the seurat meta.data to plot them in the trajectory.\n Also used to identify the cluster to remove if provided see below",
+  "cluster", "r", 1, "character", "exclude a cluster from the pseudotime ordering.\n Cluster column in the seurat meta.data need to be provided see below",
+  'removeClusterWithSignature', "d", 1, "character", "Name of a signature. Exclude the cluster with the highest enrichment for the given signature. \nCluster signature enrichment table need to be provided see below",
+  'signatureEnrichmentTable', 't', 1, "character", "Path to the table with the signature enrichment per cluster for -d option",
+  "startingClust", "c", 1, "character", "Starting clust for ordering cells",
+  "startingCt", "e", 1, "character", "Starting cell type for ordering cells"),
+byrow=TRUE, ncol=5)
+
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputSeurat)) {
+  cat("Monocle 2 pseudotime ordering with seurat intgrated and scaled data")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+#set default arguments values
+
+if (is.null(opt$num_comp)) {
+  opt$num_comp <- 2
+}
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+
+dir.create(opt$outdir,recursive = T,showWarnings = F)
+
+seurat <- readRDS(opt$inputSeurat)
+
+print(opt)
+
+seurat$numclust <- seurat@meta.data[,opt$clustCol]
+
+if (!is.null(opt$cluster)) {
+  print(paste("removing cluster",opt$cluster,"in seurat metadata column",opt$clusCol,"before ordering..."))
+  Idents(seurat) <- opt$clustCol
+  seurat = subset(seurat, idents = opt$cluster,invert = TRUE)
+  
+}
+
+if (!is.null(opt$removeClusterWithSignature)) {
+  if (!is.null(opt$signatureEnrichmentTable)) {
+    sigClustTable <- read.table(opt$signatureEnrichmentTable)
+    print(head(sigClustTable))
+    clustToRemove <- rownames(sigClustTable)[which.min(sigClustTable[,opt$removeClusterWithSignature])]
+    clustToRemove <- strsplit(clustToRemove,split="_")[[1]][2]
+    
+    print(paste("removing cluster",clustToRemove,"in seurat metadata column",opt$clusCol,"before ordering"))
+    Idents(seurat) <- opt$clustCol
+    seurat = subset(seurat, idents = clustToRemove,invert = TRUE)
+    
+  } else {
+    print("You need to provide a table with the selected signature score enrichment per cluster")
+    q(status=1)
+  }
+}
+
+DefaultAssay(seurat) <- "integrated"
+pd <- new("AnnotatedDataFrame", data = seurat@meta.data)
+fd <- new("AnnotatedDataFrame", data = data.frame(gene_short_name = rownames(seurat)))
+rownames(fd) <- fd$gene_short_name
+
+monocle <- newCellDataSet(GetAssayData(object = seurat, slot = "scale.data")[,rownames(seurat@meta.data)],
+                          phenoData = pd,
+                          featureData = fd,
+                          lowerDetectionLimit = 0.1,
+                          expressionFamily = uninormal())
+
+fData(monocle)$use_for_ordering <- TRUE
+
+
+print("Reducing dimension by DDRTree...")
+
+monocle <- reduceDimension(monocle,
+                           max_components = opt$num_comp,
+                           reduction_method = 'DDRTree',
+                           norm_method = "none",
+                           pseudo_expr = 0,
+                           verbose = T )
+
+print("Ordering cells...")
+
+monocle <- orderCells(monocle)
+saveRDS(monocle,paste(opt$outdir,"/monocleRamdomStart.rds",sep =""))
+
+
+GM_state_predicted <- function(cds,cellType){
+  if (length(unique(pData(cds)$State)) > 1){
+    
+    #Preselect terminal states
+    tip_leaves <- names(which(igraph::degree(minSpanningTree(cds)) == 1))
+    tip_leaves <- sub(x=tip_leaves,pattern="Y_",replacement="")
+    print(tip_leaves)
+    pData(cds)$potentialRootCells <- cds@auxOrderingData$DDRTree$pr_graph_cell_proj_closest_vertex %in% tip_leaves
+    terminal_states <- unique(pData(cds)$State[which(pData(cds)$potentialRootCells)])
+    LT_counts <- table(pData(cds)$State, pData(cds)$predicted)[,cellType]
+    print(LT_counts)
+    LT_prop <- LT_counts/rowSums(table(pData(cds)$State, pData(cds)$predicted))
+    print(LT_prop)
+    LT_propF <- LT_prop[which(labels(LT_prop) %in% terminal_states)]
+    print("LT proportion for terminal states")
+    print(LT_propF)
+    print("Initial state:")
+    #print(which(LT_propF == max(LT_propF,na.rm = T)))
+    Istate <- as.numeric(names(LT_propF)[which(LT_propF == max(LT_propF,na.rm = T))])
+    print(Istate)
+    return(Istate)
+  } else {
+    return (1)
+  }
+}
+
+
+
+GM_state_numclust <- function(cds,num){
+  if (length(unique(pData(cds)$State)) > 1){
+    
+    #Preselect terminal states
+    tip_leaves <- names(which(igraph::degree(minSpanningTree(cds)) == 1))
+    tip_leaves <- sub(x=tip_leaves,pattern="Y_",replacement="")
+    print(tip_leaves)
+    pData(cds)$potentialRootCells <- cds@auxOrderingData$DDRTree$pr_graph_cell_proj_closest_vertex %in% tip_leaves
+    terminal_states <- unique(pData(cds)$State[which(pData(cds)$potentialRootCells)])
+    S_counts <- table(pData(cds)$State, pData(cds)$numclust)[,num]
+    print(S_counts)
+    S_prop <- S_counts/rowSums(table(pData(cds)$State, pData(cds)$numclust))
+    print(S_prop)
+    S_propF <- S_prop[which(labels(S_prop) %in% terminal_states)]
+    print("S proportion for terminal states")
+    print(S_propF)
+    print("Initial state:")
+    #print(which(S_propF == max(S_propF,na.rm = T)))
+    Istate <- as.numeric(names(S_propF)[which(S_propF == max(S_propF,na.rm = T))])
+    print(Istate)
+    return(Istate)
+  } else {
+    return (1)
+  }
+}
+
+
+
+if (!is.null(pData(monocle)$predicted) & !is.null(opt$startingCt)) {
+  monocle <- orderCells(monocle,root_state = GM_state_predicted(monocle,cellType = opt$cellType))
+} else {
+  print(paste0("ordering cells with cluster ", opt$startingClust, " proportion"))
+  monocle <- orderCells(monocle,root_state = GM_state_numclust(monocle,num = opt$startingClust))
+}
+
+
+
+
+png(paste(opt$outdir,"/cellTrajectoryStates.png",sep =""))
+plot_cell_trajectory(monocle)
+dev.off()
+
+png(paste(opt$outdir,"/cellTrajectoryPhases.png",sep =""))
+plot_cell_trajectory(monocle,color_by = "phases")
+dev.off()
+
+
+png(paste(opt$outdir,"/sampleName.png",sep = ""))
+plot_cell_trajectory(monocle,color_by = "sampleName")
+dev.off()
+
+png(paste(opt$outdir,"/age.png",sep = ""))
+plot_cell_trajectory(monocle,color_by = "AGE")
+dev.off()
+
+
+if (!is.null(pData(monocle)$predicted)) {
+png(paste(opt$outdir,"/cellTrajectoryCellTypePredicted.png",sep =""))
+plot_cell_trajectory(monocle,color_by = "predicted")
+dev.off()
+}
+
+png(paste(opt$outdir,"/cellTrajectorySeuratClusters.png",sep = ""))
+plot_cell_trajectory(monocle,color_by=opt$clustCol)
+dev.off()
+
+
+
+signatures <- readRDS(opt$signatureFile)
+
+dir.create(paste(opt$outdir,"/localizeSeuratCluster",sep =""))
+dir.create(paste(opt$outdir,"/cellSignatures",sep =""))
+
+sigName <- names(signatures)
+
+for (sig in sigName) {
+  print(sig)
+  png(paste(opt$outdir,"/cellSignatures/",sig,".png",sep = ""))
+  print(plot_cell_trajectory(monocle,color_by = sig) + scale_color_gradient(low = "yellow",high = "red"))
+  dev.off()
+}
+
+for (c in unique(pData(monocle)[,opt$clustCol])) {
+  pData(monocle)$clustOfInterest <- FALSE
+  pData(monocle)[which(pData(monocle)[,opt$clustCol] == c),"clustOfInterest"] <- TRUE
+  png(paste(opt$outdir,"/localizeSeuratCluster/cluster",c,".png",sep=""))
+  print(plot_cell_trajectory(monocle,color_by="clustOfInterest"))
+  dev.off()
+}
+
+
+#Save monocle object
+saveRDS(monocle,paste(opt$outdir,"/monocle.rds",sep =""))
+
+#add state monocle info to seurat metadata
+if (unique(rownames(pData(monocle)) == rownames(seurat@meta.data))) {
+  seurat@meta.data$State_monocle <- pData(monocle)$State
+}
+
+png(paste(opt$outdir,"/tSNE_DDRTree_State.png",sep =""),width = 1600,height = 800)
+grid.arrange(plot_cell_trajectory(monocle),DimPlot(object = seurat,group.by = "State_monocle"), ncol =2)
+dev.off()
+
+png(paste(opt$outdir,"/tSNE_DDRTree_samples.png",sep =""),width = 1600,height = 800)
+grid.arrange(plot_cell_trajectory(monocle,group_by = "sampleName"),DimPlot(object = seurat,group.by = "sampleName"), ncol =2)
+dev.off()
+
+p1 <-  plot_cell_trajectory(monocle,color_by= opt$clustCol)
+p2 <- DimPlot(object = seurat,group.by = opt$clustCol) + theme(legend.position="none")
+
+png(paste(opt$outdir,"/tSNE_DDRTree_Clusters.png",sep =""),width = 1600,height = 800)
+grid.arrange(p1,p2, ncol =2)
+dev.off()
+
+#Some interesting genes 
+
+# png(paste(opt$outdir,"/geneOfInterest.png",sep =""))
+# plot_genes_in_pseudotime(monocle[c("Cd34","Procr","Ly6a","Cd48")])
+# dev.off()
+
+
+p3 <-  plot_cell_trajectory(monocle)
+p4 <- DimPlot(object = seurat,group.by = "State_monocle")
+
+
+png(paste(opt$outdir,"/tSNE_DDRTree_State.png",sep =""),width = 1600,height = 800)
+grid.arrange(p3,p4, ncol =2)
+dev.off()
+
+
+
+p11 <-  plot_cell_trajectory(monocle,color_by= opt$clustCol) + theme(legend.position="none")
+p22 <- DimPlot(object = seurat,group.by = opt$clustCol) + theme(legend.position="none")
+p33 <- FeaturePlot(object = seurat,features = "Total_mRNAs") 
+p44 <-  plot_cell_trajectory(monocle,color_by= "phases") 
+
+
+png(paste(opt$outdir,"/tSNE_DDRTree_All_fig.png",sep =""),width = 1600,height = 1600)
+grid.arrange(p33,p22,p44,p11, ncol =2)
+dev.off()
+
+saveRDS(seurat,paste(opt$outdir,"/seurat_with_monocleStates.rds",sep =""))
+
+
+monocleRaw <- monocle
+
+DefaultAssay(seurat) <- "RNA"
+
+monocleRaw@assayData$exprs <- GetAssayData(seurat,slot = "counts")
+fData(monocleRaw) <- data.frame(gene_short_name = rownames(seurat))
+rownames(fData(monocleRaw)) <- fData(monocleRaw)$gene_short_name
+monocleRaw@expressionFamily <- negbinomial.size()
+
+monocleRaw <- estimateSizeFactors(monocleRaw)
+monocleRaw <- estimateDispersions(monocleRaw)
+
+#png(paste(opt$outdir,"/geneOfInterestwithCounts.png",sep =""))
+#plot_genes_in_pseudotime(monocleRaw[c("Cd34","E2f1","Cd48")])
+#dev.off()
+
+
+#png(paste(opt$outdir,"/geneOfInterestBranchedwithCounts.png",sep =""))
+#plot_genes_branched_pseudotime(monocleRaw[c("Cd34","E2f1","Cd48")])
+#dev.off()
+
+saveRDS(monocleRaw,paste(opt$outdir,"/monocleWithCounts.rds",sep =""))
+
+
diff --git a/R_src/prepareAUCseuratEnhancePromoseuratCL.R b/R_src/prepareAUCseuratEnhancePromoseuratCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..b6bf5dac9233e3d55ffb578478b0889f100510c7
--- /dev/null
+++ b/R_src/prepareAUCseuratEnhancePromoseuratCL.R
@@ -0,0 +1,131 @@
+#-----------------------------------------------------#
+#  Add promoter and enhancer atac + Regulon assay on RNA
+#-----------------------------------------------------#
+
+suppressMessages(library(getopt))
+suppressMessages(library(Signac))
+suppressMessages(library(Seurat))
+
+# library(patchwork)
+
+
+featureDiff <- function(seurat,cells.1,cells.2,feature) {
+  data <- GetAssayData(seurat,slot = "data")
+  total.diff <- mean(data[feature,cells.1]) - mean(data[feature,cells.2])
+  return(total.diff)
+} 
+
+
+##-------------------------------------------------------------------------##
+## Option list
+##-------------------------------------------------------------------------##
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputAtacRDS',  'a', 1, "character", "REQUIRED: 10X data prepared as seurat/signac object.",
+  'inputRNARDS', 'i', 1, "character", "REQUIRED: 10X data prepared as seurat object.",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  'regulonsAUC', 'r',1, "character", "Regulons AUC file"
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputAtacRDS)) {
+  cat("Create influence graph from regulon table")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+seurat_ATAC <- readRDS(opt$inputAtacRDS)
+DefaultAssay(seurat_ATAC) <- "peaks"
+
+seurat_RNA <- readRDS(opt$inputRNARDS)
+
+regulons.AUC <- read.table(opt$regulonsAUC, header = T, sep = ",", check.names = F, row.names = 1)
+regulons.AUC <- regulons.AUC[,colnames(regulons.AUC)[which(endsWith(colnames(regulons.AUC),suffix = "+)"))]]
+
+outdir <- opt$outdir
+# dir.create(outdir, recursive = T)
+
+#-----------------------------------------------------#
+#         Create promoter and enhancer assay
+#-----------------------------------------------------#
+print("Starting promoter activity")
+# Promoter +-3kbp from TSS
+gene.act.promotor <- GeneActivity(seurat_ATAC, extend.upstream = 3000, extend.downstream = 3000)
+
+seurat_ATAC[["promoter"]] <- CreateAssayObject(counts = gene.act.promotor)
+
+seurat_ATAC <- NormalizeData(
+  object = seurat_ATAC,
+  assay = "promoter",
+  normalization.method = "LogNormalize",
+  scale.factor = median(seurat_ATAC$nCount_promoter)
+)
+
+
+# Enhancer +-50kb minus +-3kb from TSS
+print("Starting enhancer activity")
+gene.act.enhancer <- GeneActivity(seurat_ATAC, extend.upstream = 50000, extend.downstream = 50000)
+
+gene.act.enhancer.final <- gene.act.enhancer - gene.act.promotor
+
+seurat_ATAC[["enhancer"]] <- CreateAssayObject(counts = gene.act.enhancer.final)
+
+seurat_ATAC <- NormalizeData(
+  object = seurat_ATAC,
+  assay = "enhancer",
+  normalization.method = "LogNormalize",
+  scale.factor = median(seurat_ATAC$nCount_enhancer)
+)
+print("Savin seurat file annotated")
+saveRDS(seurat_ATAC, file = paste0(outdir, "/TransferRnaAtac_04/SignacMotif_2/", "ATAC_int_enhancerPromoter.rds"))
+
+#-----------------------------------------------------#
+#       Add AUCell assay to our seurat object
+#-----------------------------------------------------#
+
+print("Start adding AUCell Assay")
+auCell_data <- t(regulons.AUC)
+
+seurat_RNA[["AUCell"]] <- CreateAssayObject(data=auCell_data)
+
+
+#find cluster regulon markers
+Idents(seurat_RNA) <- "FinalCluster"
+
+#Set AUCell slot 
+DefaultAssay(seurat_RNA) <- "AUCell"
+
+
+clusterRegulon <- FindAllMarkers(seurat_RNA,
+                                 only.pos = T,
+                                 logfc.threshold= 0,
+                                 pseudocount.use = 1,
+                                 min.pct = 0.1)
+
+# Compute true mean difference in score because Seurat comput only logFC
+# The featureDiff functions is loaded from ../R_src/computeDiffFun.R file
+
+clusterRegulon$avg_diff <- NA
+
+for (rm in rownames(clusterRegulon)) {
+  feature <- clusterRegulon[rm,"gene"]
+  cells.1 <- colnames(seurat_RNA)[which(seurat_RNA$FinalCluster == clusterRegulon[rm,"cluster"])]
+  cells.2 <- colnames(seurat_RNA)[which(seurat_RNA$FinalCluster != clusterRegulon[rm,"cluster"])]
+  clusterRegulon[rm,"avg_diff"] <- featureDiff(seurat_RNA,cells.1,cells.2,feature)
+}
+
+# cut off on p adjusted value
+clusterRegulon <- clusterRegulon[which(clusterRegulon$p_val_adj < 0.05 & clusterRegulon$avg_diff > 0.002),]
+
+
+# rename some columns
+colnames(clusterRegulon) <- c("p_val","avg_logFC","pct.1","pct.2","p_val_adj","cluster","regulon","avg_diff")
+
+# order columns
+clusterRegulon <- clusterRegulon[,c("p_val","pct.1","pct.2","p_val_adj","cluster","regulon","avg_diff")]
+
+saveRDS(seurat_RNA, file = paste0(outdir, "RNA/regulonAnalysis/", "seuratAUC.rds"))
+write.table(clusterRegulon, paste0(outdir, "RNA/regulonAnalysis/", "regulonMarkersCluster.tsv"), sep = "\t", quote = F, row.names = T, col.names = T)
diff --git a/R_src/prepareCL.R b/R_src/prepareCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..9af18a9ec67e42d1f30f0a280c6e97497b611b5f
--- /dev/null
+++ b/R_src/prepareCL.R
@@ -0,0 +1,121 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+#Load required packages
+suppressMessages(library(monocle))
+suppressMessages(library(scran))
+suppressMessages(library(getopt))
+
+
+
+#load homemade function
+source("R_src/data_preparation.R")
+
+
+
+# Quality control and preparation of 10X data for analysis
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputRDS',  'i', 1, "character", "REQUIRED: 10X data prepared as monocle object (.RDS generated by prepare_data.R).",
+  "mitochFilter", "m",1, "numeric", "Ribosomal protein gene RNA proportion filtering (eg 0.1 to remove cells with more than 10% of Rbp transcripts",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)'
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputRDS)) {
+  cat("Add cell cycle phases to each cells with scran, filter cells of low quality, control percentage mitochondrial transcripts")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+
+
+gbm_cds <- readRDS(opt$inputRDS)
+
+
+# print some stuff 
+# head of the expression matrix
+exprs(gbm_cds[c(1:5),c(1:5)])
+
+# cells information 
+head(pData(gbm_cds))
+
+# gene information 
+head(fData(gbm_cds))
+
+# Estimating size factor and dispersion not needed anymore
+#gbm_cds <- estimateSizeFactors(gbm_cds)
+#gbm_cds <- estimateDispersions(gbm_cds)
+
+
+# First getting cell cycle phases with cyclone function from package scrann (see data_preparation.R))
+gbm_cds <- getCellCyclePhases(gbm_cds,outdir = paste(opt$outdir,"/cellCycle",sep =""))
+
+# Filtering cells on UMI counts as explained in monocle doc 
+# print num cells before filtering
+
+print("Matrix dim before cell filtering :")
+dim(pData(gbm_cds))
+
+gbm_cds <- filterCells(gbm_cds,paste(opt$outdir,"/cellsFiltering",sep = ""),propMitochFilter = opt$mitochFilter)
+
+# Filter on mitochondrial RNA proportion if specified
+if (!is.null(opt$mitochFilter)) {
+  valid_cells_Mito <- row.names(subset(pData(gbm_cds),
+                                       percentMito < opt$mitochFilter))
+  
+  nonValidCellMito <- length(row.names(pData(gbm_cds)))-length(valid_cells_Mito)
+  
+  
+}
+
+# Filter cells with to few expressed genes to assign a cell cycle phases with cyclone
+# not the the case with cellranger2 workflow
+
+valid_cells <- row.names(subset(pData(gbm_cds),
+            is.na(G2M_score) == FALSE &
+            is.na(S_score) == FALSE &
+            is.na(G1_score) == FALSE &
+            is.na(phases) == FALSE ))
+
+nonValidCellNum <- length(row.names(pData(gbm_cds)))-length(valid_cells)
+
+print(paste(nonValidCellNum,"cells were filtered out because they express to few genes to assign a cell cycle phase with cyclone to them. "))
+
+# Define final valid cells
+if (!is.null(opt$mitochFilter)) {
+  print(paste(nonValidCellMito,"cells were filtered out because more than", opt$mitochFilter, "of their transcripts are ribosomal protein gene RNA"))
+  
+  valid_cells <- valid_cells[valid_cells %in% valid_cells_Mito]
+  }
+
+
+#cells are row in pData(monocle) but column in monocle object
+gbm_cds <- gbm_cds[,valid_cells]
+
+
+#After
+
+print("Matrix dim after cell filtering :")
+dim(pData(gbm_cds))
+
+
+saveRDS(object = gbm_cds,file = paste(opt$outdir,"/gbm_cds_treated.rds",sep =""))
diff --git a/R_src/prepareDataActinn.R b/R_src/prepareDataActinn.R
new file mode 100644
index 0000000000000000000000000000000000000000..3bf79cedf9e0bdeba3a9ef942ca131ec87a38fd5
--- /dev/null
+++ b/R_src/prepareDataActinn.R
@@ -0,0 +1,64 @@
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+library(Seurat)
+library(getopt)
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputRDS',  'i', 1, "character", "REQUIRED: 10X data ordered by monocle (.RDS generated by orderCL.R).",
+  'subsetData', 's', 1, "character", "Population to keep, sep by +",
+  'inputTrain', 't', 1, "character", "Require .rds object of the training dataset",
+  'labelName', 'l', 1, "character", "Name of the label column",
+  'dataSlot', 'd', 1, "character", "Name of the data slot to recover : either counts, data or scale.data",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)'
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputRDS)) {
+  cat("Gene filtering of a gbm cds ordered. For scenic use, two filters")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+#set default arguments values
+if(is.null(opt$dataSlot)){
+  opt$dataSlot <- "counts"
+}
+
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+
+# opt$inputRDS <- "/shared/projects/scRNA_HSPC_Aging/scRNA_all_paper_pipeline/output/Seurat3_integration_3_upper_mad_filter_ref_wt/Analysis_new_cluster/annotated_update.rds"
+# opt$inputTrain <- "/shared/projects/scRNA_HSPC_Aging/scRNA_all_paper_pipeline/output/RodriguezPreparation/seuratForCastle.rds"
+# opt$labelName <- "library_id"
+
+dir.create(opt$outdir, recursive = T)
+
+seurat_target <- readRDS(opt$inputRDS)
+seurat_training <- readRDS(opt$inputTrain)
+
+if(!is.null(opt$subsetData)){
+  pop_keep <- strsplit(opt$subsetData, split = "\\+")[[1]]
+  
+  Idents(seurat_training) <- opt$labelName
+  seurat_training <- subset(seurat_training, idents = pop_keep)
+}
+
+seurat_training$cellName <- rownames(seurat_training@meta.data)
+
+# Extract data from the seurat object
+data_target <- as.data.frame(GetAssayData(seurat_target, slot = opt$dataSlot))
+data_training <- as.data.frame(GetAssayData(seurat_training, slot = opt$dataSlot))
+
+write.table(x = seurat_training@meta.data[,c("cellName", opt$labelName)], file = paste0(opt$outdir, opt$labelName, "_training_label", ".txt"), 
+            sep = "\t", col.names = F, row.names = F, quote = F)
+
+write.table(x = data_target, file = paste0(opt$outdir, "seurat_data_target", ".csv"), quote = F, sep = ",")
+write.table(x = data_training, file = paste0(opt$outdir, opt$labelName, "_seurat_data_training", ".csv"), quote = F, sep = ",")
+
diff --git a/R_src/prepareNestorowaCL.R b/R_src/prepareNestorowaCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..d49078d3a319cef5cd0593d941725f730ed14965
--- /dev/null
+++ b/R_src/prepareNestorowaCL.R
@@ -0,0 +1,294 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(monocle))
+suppressMessages(library(Seurat))
+suppressMessages(library(scran))
+suppressMessages(library(getopt))
+suppressMessages(library(biomaRt))
+suppressMessages(library(gprofiler2))
+suppressMessages(library(stringr))
+suppressMessages(library(scales))
+suppressMessages(library(grid))
+
+
+
+
+
+#load homemade function
+source("R_src/data_preparation.R")
+
+# R data preparation scripts for Rodiguez-Fraticelli data
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputData',  'i', 1, "character", "REQUIRED: downloaded Nestorowa raw data (.RDS generated by prepare_data.R).",
+  "inputMetaData", "m", 1, "character", "REQUIRED : downloaded Nestorowa meta data",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  'minPropCellExp', "p",1,"numeric", "Genes expressed in less than this proportion of cells are discarrded frome the analysis (0.001 by default)"
+  ), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputData)) {
+  cat("Prepare Nestorowa data as source for CaSTLe")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+
+dir.create(opt$outdir,recursive = T,showWarnings = F)
+
+
+if (is.null(opt$minPropCellExp)) {
+  opt$minPropCellExp <- 0.001
+  print(opt$minPropCellExp)
+}
+
+#setwd(opt$outdir)
+
+#get Seurat3 Nestorowa
+rawCountsPath <- opt$inputData
+
+
+rawTable <- read.table(rawCountsPath,sep = "\t",header = T,row.names = 1)
+phenoData0 <- read.table(opt$inputMetaData)
+
+# We kee broad gating discarding MPP_broad column that is sudivided in STHSC, MPP1, MPP2, MPP3
+phenoData <- phenoData0[,which(endsWith(colnames(phenoData0), "broad"))]
+phenoData <- phenoData[,which(!startsWith(colnames(phenoData), "MPP_"))]
+
+# We discard 136 cells with no labels
+
+phenoData <- phenoData[which(rowSums(phenoData)>0),]
+
+# We renamed LT- and LT. in phenoData and rawTable rownames to LTHSC
+unique(str_split_fixed(colnames(rawTable),"_",2)[,1])
+unique(str_split_fixed(rownames(phenoData),"_",2)[,1])
+
+rownames(phenoData)[which(startsWith(rownames(phenoData),"LT-"))] <-gsub("LT-HSC",replacement = "LTHSC",x = rownames(phenoData)[which(startsWith(rownames(phenoData),"LT-"))])
+colnames(rawTable)[which(startsWith(colnames(rawTable),"LT."))] <-gsub("LT.HSC",replacement = "LTHSC",x = colnames(rawTable)[which(startsWith(colnames(rawTable),"LT."))])
+
+rawTable <- rawTable[which(startsWith(rownames(rawTable), "ENSMUS")),]
+
+rawTable <- rawTable[,rownames(phenoData)]
+
+
+
+
+
+
+# We revover gene short name from ensembl ID with features table from seurat
+conversionTable <- gconvert(rownames(rawTable),organism = "mmusculus",target = "MGI",mthreshold = 1,filter_na = T)
+
+rawTable_filter <- rawTable[rownames(rawTable) %in% conversionTable$input,]
+
+# Only needed if ensemble id (it is the case in the workflow)
+
+dupGeneNames <- conversionTable$target[which(duplicated(conversionTable$target))]
+
+write.csv(conversionTable[which(duplicated(conversionTable$target)),],
+            paste(opt$outdir,"/dupGenesName.csv",sep = ""))
+print("Dup gene short names existing, making them unique...")
+# rownames(rawTable) <- make.unique(conversionTable$target)
+
+rownames(rawTable_filter) <- make.unique(conversionTable$target)
+
+
+
+
+
+
+seurat <- CreateSeuratObject(counts = rawTable_filter,meta.data = phenoData)
+
+  #Use of Rodriguez QC to filter poor qualisty cells To pass quality control, 
+  #cells were required to have at least 200.000 reads mapping to nuclear genes, at least 4000 genes detected, less than 10% of mapped reads mapping to mitochondrial genes, 
+  #and less than 50% of mapped reads mapping to the External RNA Controls Consortium (ERCC) spike-ins (#4456740, Life Technologies) 
+min(seurat@meta.data$nFeature_RNA,na.rm = T)
+grep(x = rownames(seurat),pattern = "^Mt-")
+#Filtering seems to be already done
+
+# seurat[["percentMito"]] <- PercentageFeatureSet(seurat, pattern = "^Mt-")
+# seurat <- subset(x = seurat, subset = nFeature_RNA >= 4000 & )
+# seurat <- 
+
+getLab <- function(cell,seurat) {
+  res = which.max(seurat@meta.data[cell,endsWith(colnames(seurat@meta.data),"_broad")])
+  return(names(res))
+}
+
+seurat@meta.data$library_id <- sapply(X = rownames(seurat@meta.data), FUN = getLab,seurat = seurat,simplify = T)
+
+
+## Switch to Monocle object to more easily discard unexpressed genes
+
+pd <- new("AnnotatedDataFrame", data = seurat@meta.data)
+fd <- new("AnnotatedDataFrame", data = data.frame(gene_short_name = rownames(seurat)))
+rownames(fd) <- fd$gene_short_name
+
+
+
+monocleAll <- newCellDataSet(GetAssayData(object = seurat, slot = "counts"),
+                             phenoData = pd,
+                             featureData = fd,
+                             lowerDetectionLimit = 0.1,
+                             expressionFamily = negbinomial.size())
+
+
+
+pData(monocleAll)$Total_mRNAs <- Matrix::colSums(exprs(monocleAll))
+monocleAll <- detectGenes(monocleAll, min_expr = 0.1)
+
+# Filter out genes expressed in too few cells
+fData(monocleAll)$use_for_castle <- fData(monocleAll)$num_cells_expressed > opt$minPropCellExp * ncol(monocleAll)
+
+gbm_to_seurat <- monocleAll[fData(monocleAll)$use_for_castle==T,]
+
+seurat <- CreateSeuratObject(counts = exprs(gbm_to_seurat), meta.data = pData(gbm_to_seurat))
+
+# Need normalized data for castle
+seurat <- NormalizeData(object = seurat)
+
+# Further analysis to highlight some signature
+seurat$generalCellType <- "HSPC"
+seurat@meta.data[seurat@meta.data$library_id %in% c("MEP_broad", "GMP_broad", "CMP_broad"), "generalCellType"] <- "Prog"
+
+# Signature for Prog cellType
+Idents(seurat) <- "generalCellType"
+markers_ProgVSHSPC <- FindMarkers(seurat, ident.1 = "Prog", ident.2 = "HSPC", logfc.threshold = 0.25)
+markers_ProgVSHSPC_sig <- markers_ProgVSHSPC[markers_ProgVSHSPC$p_val_adj < 0.05,]
+markers_ProgVSHSPC_sig <- markers_ProgVSHSPC_sig[order(markers_ProgVSHSPC_sig$avg_log2FC, decreasing = T),]
+
+top_markers_ProgVSHSPC_sig <- markers_ProgVSHSPC_sig[rownames(markers_ProgVSHSPC_sig) %in% head(rownames(markers_ProgVSHSPC_sig), 100),]
+bot_markers_ProgVSHSPC_sig <- markers_ProgVSHSPC_sig[rownames(markers_ProgVSHSPC_sig) %in% tail(rownames(markers_ProgVSHSPC_sig), 100),]
+
+# Signature spÃ© for GMP vs other prog
+Idents(seurat) <- "library_id"
+markers_GMPvsOtherProg <- FindMarkers(seurat, ident.1 = "GMP_broad", ident.2 = c("CMP_broad", "MEP_broad"), logfc.threshold = 0.25)
+markers_GMPvsOtherProg_sig <- markers_GMPvsOtherProg[markers_GMPvsOtherProg$p_val_adj < 0.05,]
+markers_GMPvsOtherProg_sig <- markers_GMPvsOtherProg_sig[order(markers_GMPvsOtherProg_sig$avg_log2FC, decreasing = T),]
+
+top_markers_GMPvsOtherProg_sig <- markers_GMPvsOtherProg_sig[rownames(markers_GMPvsOtherProg_sig) %in% head(rownames(markers_GMPvsOtherProg_sig), 100),]
+bot_markers_GMPvsOtherProg_sig <- markers_GMPvsOtherProg_sig[rownames(markers_GMPvsOtherProg_sig) %in% tail(rownames(markers_GMPvsOtherProg_sig), 100),]
+
+# CMP vs others prog
+markers_CMPvsOtherProg <- FindMarkers(seurat, ident.1 = "CMP_broad", ident.2 = c("GMP_broad", "MEP_broad"), logfc.threshold = 0.25)
+markers_CMPvsOtherProg_sig <- markers_CMPvsOtherProg[markers_CMPvsOtherProg$p_val_adj < 0.05,]
+markers_CMPvsOtherProg_sig <- markers_CMPvsOtherProg_sig[order(markers_CMPvsOtherProg_sig$avg_log2FC, decreasing = T),]
+
+top_markers_CMPvsOtherProg_sig <- markers_CMPvsOtherProg_sig[rownames(markers_CMPvsOtherProg_sig) %in% head(rownames(markers_CMPvsOtherProg_sig), 100),]
+bot_markers_CMPvsOtherProg_sig <- markers_CMPvsOtherProg_sig[rownames(markers_CMPvsOtherProg_sig) %in% tail(rownames(markers_CMPvsOtherProg_sig), 100),]
+
+# MEP vs others prog
+markers_MEPvsOtherProg <- FindMarkers(seurat, ident.1 = "MEP_broad", ident.2 = c("GMP_broad", "CMP_broad"), logfc.threshold = 0.25)
+markers_MEPvsOtherProg_sig <- markers_MEPvsOtherProg[markers_MEPvsOtherProg$p_val_adj < 0.05,]
+markers_MEPvsOtherProg_sig <- markers_MEPvsOtherProg_sig[order(markers_MEPvsOtherProg_sig$avg_log2FC, decreasing = T),]
+
+top_markers_MEPvsOtherProg_sig <- markers_MEPvsOtherProg_sig[rownames(markers_MEPvsOtherProg_sig) %in% head(rownames(markers_MEPvsOtherProg_sig), 100),]
+bot_markers_MEPvsOtherProg_sig <- markers_MEPvsOtherProg_sig[rownames(markers_MEPvsOtherProg_sig) %in% tail(rownames(markers_MEPvsOtherProg_sig), 100),]
+
+sig_markers <- data.frame(Prog_signature_up = rownames(top_markers_ProgVSHSPC_sig),
+                          Prog_signature_down = rownames(bot_markers_ProgVSHSPC_sig),
+                          GMP_signature_up = rownames(top_markers_GMPvsOtherProg_sig),
+                          GMP_signature_down = rownames(bot_markers_GMPvsOtherProg_sig),
+                          CMP_signature_up = rownames(top_markers_CMPvsOtherProg_sig),
+                          CMP_signature_down = rownames(bot_markers_CMPvsOtherProg_sig),
+                          MEP_signature_up = rownames(top_markers_MEPvsOtherProg_sig),
+                          MEP_signature_down = rownames(bot_markers_MEPvsOtherProg_sig))
+
+
+write.table(sig_markers, file = paste0(opt$outdir, "/progenitors_signature", ".csv"), 
+            sep = ",", quote = F, row.names = T, col.names = T)
+
+# # Short seurat analysis without any correction
+# seurat <- FindVariableFeatures(object = seurat,selection.method = "vst", nfeatures = 2000, verbose = T)
+# seurat <- ScaleData(object = seurat)
+# 
+# seurat <- RunPCA(object = seurat)
+# 
+# 
+# png(paste(opt$outdir,"/ElbowPlot.png",sep =""))
+# ElbowPlot(object = seurat,ndims = 30)
+# dev.off()
+# 
+# 
+# print("Clustering...")
+# 
+# seurat <- FindNeighbors(object = seurat,dims = c(1:15),k.param = 20)
+# seurat <- FindClusters(object = seurat,resolution = c(0.5,0.6,0.7,0.8,0.9,1,1.2))
+# 
+# print("Running TSNE...")
+# 
+# seurat <- RunTSNE(seurat,dims = c(1:15))
+# 
+# print("Running UMAP...")
+# 
+# seurat <- RunUMAP(seurat,dims = c(1:15))
+# 
+# print("UMAP ok")
+# print(colnames(seurat@meta.data))
+# 
+# 
+# colPrefix <- "RNA_snn_res."
+# 
+# 
+# umapListRes <- list()
+# tsneListRes <- list()
+# for (r in c(0.6,0.8,1,1.2)) {
+#   umapListRes[[as.character(r)]] <- DimPlot(seurat,
+#                                             reduction = "umap",
+#                                             label = T,
+#                                             group.by = paste(colPrefix,r,sep="")) + 
+#     NoLegend() +
+#     ggtitle(paste("res",r))
+#   
+#   tsneListRes[[as.character(r)]] <- DimPlot(seurat,
+#                                             reduction = "tsne",
+#                                             label = T,
+#                                             group.by = paste(colPrefix,r,sep="")) + 
+#     NoLegend() +
+#     ggtitle(paste("res",r))
+# }
+# 
+# png(paste(opt$outdir,"/tsne_different_res.png",sep = ""),height = 800,width = 800)
+# grid.arrange(tsneListRes[[1]],tsneListRes[[2]],tsneListRes[[3]],tsneListRes[[4]])
+# dev.off()
+# 
+# png(paste(opt$outdir,"/umap_different_res.png",sep = ""),height = 800,width = 800)
+# grid.arrange(umapListRes[[1]],umapListRes[[2]],umapListRes[[3]],umapListRes[[4]])
+# dev.off()
+# 
+# 
+# #Check for unwanted source of variation
+# pPhases <- grid.rect(gp=gpar(col="white"))
+# 
+# 
+# pPred <- DimPlot(seurat,group.by = "library_id")
+# 
+# pUMI <- FeaturePlot(seurat, "Total_mRNAs")
+# pMito <- grid.rect(gp=gpar(col="white"))
+# 
+# png(paste(opt$outdir,"/umap_factors.png",sep = ""),height = 800,width = 800)
+# grid.arrange(pPhases,pPred,pUMI,pMito)
+# dev.off()
+# 
+
+
+
+saveRDS(object = seurat,file = paste(opt$outdir,"/seuratForActinn.rds",sep =""))
diff --git a/R_src/prepareRodriguezCL.R b/R_src/prepareRodriguezCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..2b0e7878ef802d043900aa34d29cc7abd7039332
--- /dev/null
+++ b/R_src/prepareRodriguezCL.R
@@ -0,0 +1,121 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(monocle))
+suppressMessages(library(Seurat))
+suppressMessages(library(scran))
+suppressMessages(library(getopt))
+
+
+
+#load homemade function
+source("R_src/data_preparation.R")
+
+# R data preparation scripts for Rodiguez-Fraticelli data
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputData',  'i', 1, "character", "REQUIRED: downloaded Rodriguez raw data (.RDS generated by prepare_data.R).",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  'minPropCellExp', "p",1,"numeric", "Genes expressed in less than this proportion of cells are discarrded frome the analysis (0.001 by default)"
+  ), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputData)) {
+  cat("Prepare Rodriguez - Fraticelli data as source for CaSTLe")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+
+dir.create(opt$outdir,recursive = T,showWarnings = F)
+
+
+if (is.null(opt$minPropCellExp)) {
+  opt$minPropCellExp <- 0.001
+  print(opt$minPropCellExp)
+}
+
+#setwd(opt$outdir)
+
+#get Seurat3 Rodriguez
+rawCountsPaths <-list.files(opt$inputData,full.names =T)
+
+#make sure we gate the correct file (no MPP4)
+rawCountsPaths <- rawCountsPaths[which(grepl(pattern = 'LTHSC|MPP2|MPP3|STHSC',rawCountsPaths))]
+
+
+getSeuratRodriguez <- function(rawCountsPath) {
+  
+  rawTable <- read.csv(rawCountsPath)
+  
+  phenoData <- rawTable[,c(1:5)]
+  rownames(phenoData) <- rawTable$cell_id
+  data <- rawTable[,c(6:ncol(rawTable))]
+  rownames(data) <- rawTable$cell_id
+  seurat <- CreateSeuratObject(counts = t(data),meta.data = phenoData)
+  
+  #Use of Rodriguez QC to filter poor qualisty cells
+  seurat <- subset(x = seurat, subset = pass_filter > 0.1)
+  print(unique(seurat@meta.data$seq_run_id))
+  return(seurat)
+  
+}
+
+seuratObjects <- lapply(rawCountsPaths,getSeuratRodriguez)
+
+# We do not take the MPP4
+seuratAll <- merge(x = merge(x = seuratObjects[[1]], y = seuratObjects[[2]]),
+                         y= merge(x = seuratObjects[[3]],y = seuratObjects[[4]]))
+
+seuratAll@meta.data$library_id <- factor(seuratAll@meta.data$library_id,levels = c("LTHSC","STHSC", "MPP2", "MPP3"))
+
+
+## Switch to Monocle object to more easily discard unexpressed genes
+
+pd <- new("AnnotatedDataFrame", data = seuratAll@meta.data)
+fd <- new("AnnotatedDataFrame", data = data.frame(gene_short_name = rownames(seuratAll)))
+rownames(fd) <- fd$gene_short_name
+
+
+
+monocleAll <- newCellDataSet(GetAssayData(object = seuratAll, slot = "counts"),
+                             phenoData = pd,
+                             featureData = fd,
+                             lowerDetectionLimit = 0.1,
+                             expressionFamily = negbinomial.size())
+
+
+
+pData(monocleAll)$Total_mRNAs <- Matrix::colSums(exprs(monocleAll))
+monocleAll <- detectGenes(monocleAll, min_expr = 0.1)
+
+# Filter out genes expressed in too few cells
+fData(monocleAll)$use_for_castle <- fData(monocleAll)$num_cells_expressed > opt$minPropCellExp * ncol(monocleAll)
+
+gbm_to_seurat <- monocleAll[fData(monocleAll)$use_for_castle==T,]
+
+seurat <- CreateSeuratObject(counts = exprs(gbm_to_seurat), meta.data = pData(gbm_to_seurat))
+
+# Need normalized data for castle
+seurat <- NormalizeData(object = seurat)
+
+
+saveRDS(object = seurat,file = paste(opt$outdir,"/seuratForCastle.rds",sep =""))
diff --git a/R_src/prepareSTREAManalysisCL.R b/R_src/prepareSTREAManalysisCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..5864b180b830abe3a41b51100215d683be436e06
--- /dev/null
+++ b/R_src/prepareSTREAManalysisCL.R
@@ -0,0 +1,105 @@
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(Seurat))
+suppressMessages(library(RColorBrewer))
+suppressMessages(library(readxl))
+suppressMessages(library(stringr))
+suppressMessages(library(getopt))
+suppressMessages(library(scales))
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputRDS',  'i', 1, "character", "REQUIRED : 10X data prepared as monocle or seurat object.",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "prefix", "p", 1, "character", "Prefix for the output file",
+  "downSample", "d", 1,"numeric", "subsample the dataset with the given number of cells",
+  "removeCluster", "r", 1, "character", "remove cell for the given cluster (need allData)",
+  "subsetTreatment", "s", 1, "character", "subset cell for the given treatment (need allData)"
+), byrow=TRUE, ncol=5)
+
+opt = getopt(spec)
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputRDS)) {
+  cat("Prepare data for trajectory inference with stream")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+
+
+dir.create(opt$outdir, recursive = T)
+
+prefix <- opt$prefix
+
+print(prefix)
+
+## ---------------------------------------------------------------------
+## Color scheme
+## ---------------------------------------------------------------------
+
+colorTreatment <- c("#664CFF", "#FF8000")
+colorCluster <- c("#E69F00", "#CC79A7", "#0072B2", "#009E73", "#D55E00", "#56B4E9")
+colorPhases <- c(brewer.pal(9,"RdPu"))[c(3,6,9)]
+# colorBranches <- c("#332288", "#117733", "#DDCC77", "#AA4499", "#88CCEE")
+
+seurat <- readRDS(opt$inputRDS)
+seurat$cellName <- rownames(seurat@meta.data)
+#Set integrated slot as active assay
+DefaultAssay(seurat) <- "integrated"
+
+colorClusters <- cbind(levels(seurat$FinalCluster),
+                       colorCluster)
+
+if (!is.null(opt$removeCluster)) {
+  seurat = subset(seurat, idents = opt$removeCluster,invert = TRUE)
+  colorClusters = colorClusters[!colorClusters[,1] %in% opt$removeCluster,]
+}
+
+
+conditionColors <- cbind(c("Ctrl","RA"),colorTreatment)
+
+phasesColors <- cbind(levels(seurat$phases), colorPhases)
+
+if (!is.null(opt$subsetTreatment)) {
+  Idents(seurat) <- "condition"
+  
+  seurat = subset(seurat, idents = opt$subsetTreatment)
+  conditionColors = conditionColors[which(conditionColors[,1]!=opt$subsetTreatment),]
+  Idents(seurat) <- "FinalCluster"
+  
+}
+
+if (!is.null(opt$downSample)) {
+  seurat = subset(seurat, cells = sample(Cells(seurat), opt$downSample),random.seed = 2020)
+}
+
+# Extract integrated matrix 
+data <- GetAssayData(object = seurat, slot = "scale.data")
+data <- data[,rownames(seurat@meta.data)]
+
+write.table(data,paste(opt$outdir,"/",prefix,"_scaleDataForStream.tsv",sep = ""),quote = FALSE, sep = "\t",col.names = T)
+
+# Write cluster annotation for each cells
+write.table(seurat@meta.data[,c("cellName", "FinalCluster")],paste(opt$outdir,"/",prefix,"_seuratClusters.tsv",sep = ""),quote = FALSE,sep = "\t",col.names = FALSE,row.names = F)
+# Write color associated to each cluster
+write.table(colorClusters,paste(opt$outdir,"/",prefix,"_colorClusters.tsv",sep =""),quote = FALSE,sep = "\t",col.names = FALSE,row.names = F)
+
+# Write condition annot 
+write.table(seurat$condition,paste(opt$outdir,"/",prefix,"_treatment.tsv",sep =""),quote = FALSE,sep = "\t",col.names = FALSE)
+# Write color associated to the condition
+write.table(conditionColors,paste(opt$outdir,"/",prefix,"_colorTreatment.tsv",sep =""),quote = FALSE,sep = "\t",col.names = FALSE,row.names = F)
+
+# Write phases annot
+write.table(seurat$phases,paste(opt$outdir,"/",prefix,"_cellCyclePhases.tsv",sep =""),quote = FALSE,sep = "\t",col.names = FALSE)
+# Write color assosciated to phases
+write.table(phasesColors,paste(opt$outdir,"/",prefix,"_colorCellCyclePhases.tsv",sep =""),quote = FALSE,sep = "\t",col.names = FALSE,row.names = F)
diff --git a/R_src/prepareSeuratCL.R b/R_src/prepareSeuratCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..f17cc4d8cbd9f2efa1072d1e0fb6d62d4873c017
--- /dev/null
+++ b/R_src/prepareSeuratCL.R
@@ -0,0 +1,188 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+#Load required packages
+suppressMessages(library(Seurat))
+suppressMessages(library(scran))
+suppressMessages(library(getopt))
+suppressMessages(library(grid))
+suppressMessages(library(gridExtra))
+suppressMessages(library(scales))
+suppressMessages(library(ggplot2))
+
+
+
+#load homemade function
+source("R_src/data_preparation.R")
+
+
+
+# Quality control and preparation of 10X data for analysis
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputRDS',  'i', 1, "character", "REQUIRED: 10X data prepared as a list of seurat object and feature data table (.RDS generated by prepare_data.R).",
+  "mitochFilter", "m",1, "numeric", "mitochondrial protein gene RNA percentage filtering (eg 10 to remove cells with more than 10% of mitoch protein transcripts, default none)",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  'upperMad',    'u', 1, "numeric", "cells with more than u mads log counts will be discard (default 2)",
+  "lowerMad",   'l', 1, "numeric", "cells with less than l mads log counts will be discard (default 2)"
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputRDS)) {
+  cat("Add cell cycle phases to each cells with scran, filter cells of low quality, control percentage mitochondrial transcripts, ribosomal protein transcripts")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+if (is.null(opt$outdir)) {
+  opt$outdir = "./"
+}
+
+if (is.null(opt$mitochFilter)) {
+  opt$mitochFilter <- 100
+}
+
+if (is.null(opt$mitochFilter)) {
+  opt$upperMad <- 2
+}
+
+if (is.null(opt$mitochFilter)) {
+  opt$lowerMad <- 2
+}
+
+
+input <- readRDS(opt$inputRDS)
+seurat <- input$seurat
+fd <- input$featureData
+# print some stuff 
+
+# Estimating size factor and dispersion not needed anymore
+#gbm_cds <- estimateSizeFactors(gbm_cds)
+#gbm_cds <- estimateDispersions(gbm_cds)
+
+
+# First getting cell cycle phases with cyclone function from package scran (see data_preparation.R))
+seurat <- getCellCyclePhasesSeurat(seurat,outdir = paste(opt$outdir,"/cellCycle",sep =""))
+
+# Change ensembl gene names in the seurat object ot gene short names to be able to use PercentageFeatureSet Seurat function for mt and rb transcript proportion
+#check for dup genes
+
+# Only needed if ensemble id (it is the case in the workflow)
+
+dupGeneNames <- fd[which((duplicated(fd$gene_short_name))),"gene_short_name"]
+
+if(length(dupGeneNames) == 0) {
+  rownames(seurat) <- fData(gbm_to_seurat)$gene_short_name
+  
+} else {
+  #write.csv(fd[which(is.element(fd$gene_short_name,dupGeneNames)),],
+  #          paste(opt$outdir,"/dupGenesName.csv",sep = "")
+  #)
+  print("Dup gene short names existing, making them unique...")
+  newSeuratCount <- as.matrix(GetAssayData(seurat,slot = 'counts',assay = "RNA"))
+  rownames(newSeuratCount) <- make.unique(fd$gene_short_name, sep = "--")
+  seurat <- CreateSeuratObject(counts = newSeuratCount,
+                               assay = "RNA",
+                               meta.data = seurat@meta.data
+                               )
+
+}
+
+# Compute percent Rb and percent mt
+
+seurat$percentMito <- PercentageFeatureSet(
+  seurat,
+  pattern = 'mt-',
+  assay = 'RNA'
+)
+
+
+seurat$percentRibo <- PercentageFeatureSet(
+  seurat,
+  pattern = 'Rps|Rpl|Mrp', #it might be better to have the exact gene set through GO...
+  assay = 'RNA'
+)
+
+# Filtering cells outside two MADs from median
+# print num cells before filtering
+
+print("Matrix dim before cell filtering :")
+dim(seurat)
+
+# plot qc metrics before filtering
+png(paste(opt$outdir,"/qcMetricsRaw.png",sep = ""))
+VlnPlot(seurat, features = c("nFeature_RNA", "nCount_RNA", "percentMito","percentRibo"), ncol = 3,pt.size = 0.001)
+dev.off()
+
+# plot some correlation between qc metrics 
+plot1 <- FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "percentMito")
+plot2 <- FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
+
+png(paste(opt$outdir,"/qcMetricsCr.png",sep = ""))
+grid.arrange(plot1,plot2)
+dev.off()
+
+
+
+
+minCountThreshold <- exp(median(log(seurat@meta.data$nCount_RNA)) - opt$lowerMad*mad(log(seurat@meta.data$nCount_RNA)))
+maxCountThreshold <- exp(median(log(seurat@meta.data$nCount_RNA)) + opt$upperMad*mad(log(seurat@meta.data$nCount_RNA)))
+
+
+png(paste(opt$outdir,"/cutOffLogCount.png",sep = ""))
+VlnPlot(seurat, features = c( "nCount_RNA"),pt.size = 0.001) + 
+  geom_hline(yintercept = minCountThreshold) +
+  geom_hline(yintercept = maxCountThreshold)
+dev.off()
+
+
+
+seurat <- subset(seurat, subset = nCount_RNA > minCountThreshold &
+                   nCount_RNA < maxCountThreshold & 
+                   percentMito < opt$mitochFilter)
+
+#Check if some cells don't have an assinged cell cycles phases 
+table(!is.na(seurat@meta.data$phases))
+png(paste(opt$outdir,"/assignedPhases.png",sep = ""))
+barplot(table(!is.na(seurat@meta.data$phases)),main="Phases assigned")
+dev.off()
+
+if("FALSE" %in% names(table(!is.na(seurat@meta.data$phases)))) {
+  print("Discard cells with no cell cycle phases assinged (too few genes")
+  print(table(!is.na(seurat@meta.data$phases))[1])
+  seurat@meta.data$phases[is.na(seurat@meta.data$phases)] <- "noAssigned"
+  Idents(seurat) <- "phases"
+  seurat <- subset(seurat, idents = "noAssigned",invert = T)
+  Idents(seurat) <- "orig.ident"
+  
+}
+
+png(paste(opt$outdir,"/ccPhases.png",sep = ""))
+barplot(table(seurat@meta.data$phases),main="Phases")
+dev.off()
+
+# plot qc metrics before filtering
+png(paste(opt$outdir,"/qcMetricsFiltered.png",sep = ""))
+VlnPlot(seurat, features = c("nFeature_RNA", "nCount_RNA", "percentMito","percentRibo"), ncol = 3,pt.size = 0.001)
+dev.off()
+
+print("Matrix dim after cell filtering :")
+print(dim(seurat))
+
+
+saveRDS(object = seurat,file = paste(opt$outdir,"/seurat_treated.rds",sep =""))
diff --git a/R_src/regulonMarkerCL.R b/R_src/regulonMarkerCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..e42cac61936c2535000cdead69bd0010fe25443a
--- /dev/null
+++ b/R_src/regulonMarkerCL.R
@@ -0,0 +1,379 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(getopt))
+suppressMessages(library(stringr))
+suppressMessages(library(plyr))
+suppressMessages(library(reshape2))
+suppressMessages(library(Seurat))
+suppressMessages(library(ggplot2))
+
+
+
+source("R_src/computeDiffFun.R")
+
+
+# Analysis of regulon markers of Seurat cluster
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  "inputSeurat",  "i",1, "character", "input seurat object with cluster column names numclust",
+  "inputMonocle", "m",1, "character", "input monocle object with state column named State",
+  "regulonScore", "r",1, "character", "input regulon score matrix (AUCell)",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "tfNode",         't',1, "character", "path to TF.txt to analyse in details",
+  "cellCycle",   'c', 0, "logical", "Add cell cycle genes in the heatmap RNA analysis",
+  "scoreDiff", "s",  1, "numeric", "Score differences threshold to select the markers",
+  "conditions",  "a", 1,  "character", "name of meta cols, separated by +, (eg: AGE+Genotype) for a condition diff test per clusters",
+  "groupingVar", "g", 1, "character", "grouping variable (eg sequencing platform) for condition test to discard batch effect"
+), byrow=TRUE, ncol=5);
+
+
+
+opt = getopt(spec)
+
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputSeurat)) {
+  cat("Create influence graph from regulon table")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+
+# For testing
+# setwd("/shared/projects/scRNA_HSPC_Aging/scRNA_infer/")
+# selectedTF <- c("Junb","Stat1","Irf1","Irf9","Myc","Gata2","Gata1","Spi1","Bclaf1","Cebpa","Tal1","Ikzf1","Fli1","Klf1","Zfpm1")
+# write.table(selectedTF,"input/selectedTF.txt",sep = "\t",row.names = F,col.names = F)
+# 
+# opt <- list()
+# opt$outdir <- "output/regulonAnalysis/"
+# opt$inputSeurat <- "../herault_et_al/scHSC_herault/report/seurat_report.rds"
+# opt$inputMonocle <- "../herault_et_al/scHSC_herault/report/monocle_report.rds"
+# opt$tfNode <- "input/selectedTF.txt"
+# opt$regulonScore <- "output/ScenicRNA_multipleRuns/AUCell_maskDropouts_save/regulons_enrichment.csv"
+# opt$scoreDiff <- 0.002
+# opt$cellCycle <- T
+
+# set deafault arg
+if (is.null(opt$scoreDiff)) {
+  opt$scoreDiff <- 0.002
+}
+
+if(is.null(opt$outdir)) {
+  opt$outdir <- "./"
+}
+
+if(is.null(opt$cellCycle)) {
+  opt$cellCycle <- F
+}
+
+selectedTF <- as.character(read.table(opt$tfNode)$V1)
+
+
+# Printing option before running
+
+for (o in names(opt)) {
+  print(paste(o,":", opt[[o]]))
+}
+
+# loading R objects
+
+seurat <- readRDS(opt$inputSeurat)
+#monocle <- readRDS(opt$inputMonocle)
+
+# Loading regulon score table
+regulonTable <- read.csv(opt$regulonScore,row.names = 1,check.names = F)
+
+# For the moment only analyse postive regulons
+
+regulonTable <- regulonTable[,colnames(regulonTable)[which(endsWith(colnames(regulonTable),suffix = "+)"))]]
+
+# Adding regulon scores to metadata
+
+# store the data in a new assay in the seurat object of ordered cells
+auCell_data <- t(regulonTable)
+
+seurat[["AUCell"]] <- CreateAssayObject(data=auCell_data)
+
+
+#find cluster regulon markers
+Idents(seurat) <- "numclust"
+
+#Set AUCell slot 
+DefaultAssay(seurat) <- "AUCell"
+
+
+clusterRegulon <- FindAllMarkers(seurat,
+                                 only.pos = T,
+                                 logfc.threshold= 0,
+                                 pseudocount.use = 1,
+                                 min.pct = 0.1)
+
+# Compute true mean difference in score because Seurat comput only logFC
+# The featureDiff functions is loaded from ../R_src/computeDiffFun.R file
+
+clusterRegulon$avg_diff <- NA
+
+for (rm in rownames(clusterRegulon)) {
+  feature <- clusterRegulon[rm,"gene"]
+  cells.1 <- colnames(seurat)[which(seurat$numclust == clusterRegulon[rm,"cluster"])]
+  cells.2 <- colnames(seurat)[which(seurat$numclust != clusterRegulon[rm,"cluster"])]
+  clusterRegulon[rm,"avg_diff"] <- featureDiff(seurat,cells.1,cells.2,feature)
+}
+
+# cut off on p adjusted value
+clusterRegulon <- clusterRegulon[which(clusterRegulon$p_val_adj < 0.05 & abs(clusterRegulon$avg_diff) > opt$scoreDiff),]
+
+
+# rename some columns
+colnames(clusterRegulon) <- c("p_val","avg_logFC","pct.1","pct.2","p_val_adj","cluster","regulon","avg_diff")
+
+# order columns
+clusterRegulon <- clusterRegulon[,c("p_val","pct.1","pct.2","p_val_adj","cluster","regulon","avg_diff")]
+
+# Average heatmap and binarization per clust
+
+
+DefaultAssay(seurat) <- "AUCell"
+
+selectedTF <- selectedTF[paste0(c(selectedTF),"(+)")%in% rownames(seurat)]
+
+
+avgClustAuc <- AverageExpression(
+  seurat,
+  #slot = "data",
+  assays = "AUCell",
+  features = paste0(c(selectedTF),"(+)"),
+  return.seurat = T
+)
+
+png(paste(opt$outdir,"/heatmapAvgAucell.png",sep =""),height = 800,width = 800)
+DoHeatmap(avgClustAuc,features = paste0(selectedTF,"(+)")) 
+dev.off()
+
+binarizeScaledMatrix <- function(seuratAvg, assay = "RNA") {
+  selectedTFMatrix <- GetAssayData(seuratAvg,assay = assay,slot = "scale.data")
+  selectedTFMatrixBin <- selectedTFMatrix
+  selectedTFMatrixBin[] <- NA
+  selectedTFMatrixBin[selectedTFMatrix < -1] <- 0
+  selectedTFMatrixBin[selectedTFMatrix > 1] <- 1
+  return(selectedTFMatrixBin)
+}
+
+selectedTFMatrixBin <- binarizeScaledMatrix(avgClustAuc,assay = "AUCell")
+
+
+write.table(selectedTFMatrixBin,paste(opt$outdir,"/BinAvgClustAucell.tsv",sep =""),sep = "\t")
+
+# same with RNA level
+DefaultAssay(seurat) <- "RNA"
+
+selectedTF <- as.character(read.table(opt$tfNode)$V1)
+
+selectedTF <- selectedTF[selectedTF %in% rownames(seurat)]
+
+
+CDK46CycDGenes <- c("Cdk4","Cdk6","Ccnd1","Ccnd2","Ccnd3")
+CIPKIPGenes  <- c("Cdkn1b","Cdkn1a","Cdkn1c")
+INK4Genes  <-c("Cdkn2a","Cdkn2b","Cdkn2c","Cdkn2d")
+
+cellCycleGenes <- c(CDK46CycDGenes ,CIPKIPGenes,INK4Genes)
+
+cellCycleGenes <- cellCycleGenes[cellCycleGenes%in% rownames(seurat)]
+
+if (opt$cellCycle) {
+  avgClustRNA <- AverageExpression(
+    seurat,
+    #slot = "data",
+    assays = "RNA",
+    features = c(selectedTF,cellCycleGenes),
+    return.seurat = T
+  )
+  
+  avgClustRNACC <- data.frame(GetAssayData(avgClustRNA,assay = "RNA",slot = "data"))
+  CIPKIP <- colSums(avgClustRNACC[CIPKIPGenes[CIPKIPGenes %in% rownames(avgClustRNACC)],])
+  INK4 <- colSums(avgClustRNACC[INK4Genes[INK4Genes %in% rownames(avgClustRNACC)],])
+  CDK46CycD <- colSums(avgClustRNACC[CDK46CycDGenes[CDK46CycDGenes %in% rownames(avgClustRNACC)],])
+  avgClustRNACC <- rbind(avgClustRNACC,CIPKIP=CIPKIP)
+  avgClustRNACC <- rbind(avgClustRNACC,INK4=INK4)
+  avgClustRNACC <- rbind(avgClustRNACC,CDK46CycD=CDK46CycD)
+  avgClustRNACC <- avgClustRNACC[c(selectedTF,c("CIPKIP","INK4","CDK46CycD")),]
+  
+  avgClustRNACC_seurat <- CreateSeuratObject(counts = avgClustRNACC)
+  avgClustRNACC_seurat <- ScaleData(avgClustRNACC_seurat)
+  Idents(avgClustRNACC_seurat) <- colnames(avgClustRNACC_seurat)
+  
+  
+  png(paste(opt$outdir,"/heatmapAvgRNA.png",sep =""),height = 800,width = 800)
+  DoHeatmap(avgClustRNACC_seurat,features = c(selectedTF,c("CIPKIP","INK4","CDK46CycD")))
+  dev.off()
+  
+  selectedTFMatrixBinRNA <- binarizeScaledMatrix(avgClustRNACC_seurat,assay = "RNA")
+  write.table(selectedTFMatrixBinRNA,paste(opt$outdir,"/BinAvgClustRNA.tsv",sep =""),sep = "\t")
+  
+  
+  
+} else {
+  png(paste(opt$outdir,"/heatmapAvgRNA.png",sep =""),height = 800,width = 800)
+  DoHeatmap(avgClustRNA,features = selectedTF) 
+  dev.off()
+  
+  selectedTFMatrixBinRNA <- binarizeScaledMatrix(avgClustRNA,assay = "RNA")
+  write.table(selectedTFMatrixBinRNA,paste(opt$outdir,"/BinAvgClustRNA.tsv",sep =""),sep = "\t")
+  
+}
+
+rownames(selectedTFMatrixBin) <- selectedTF[paste0(selectedTF,"(+)") %in% rownames(selectedTFMatrixBin)]
+selectedTFMatrixBin <- rbind(selectedTFMatrixBin,selectedTFMatrixBinRNA[c(selectedTF[!selectedTF %in% rownames(selectedTFMatrixBin)],"CIPKIP","INK4","CDK46CycD"),])
+consensusMatrixBin <- selectedTFMatrixBin
+consensusMatrixBin[] <- NA
+consensusMatrixBin[which(selectedTFMatrixBin == selectedTFMatrixBinRNA)] <- selectedTFMatrixBin[which(selectedTFMatrixBin == selectedTFMatrixBinRNA)]
+
+consensusMatrixBinHm <- melt(consensusMatrixBin)
+
+write.table(consensusMatrixBin,paste(opt$outdir,"/binMatConsensus.tsv",sep =""),sep = "\t")
+
+png(paste(opt$outdir,"/heatmapConsensus.png",sep =""),height = 800,width = 800)
+ggplot(consensusMatrixBinHm,aes(x=Var2, y=Var1, fill=value)) + scale_fill_gradient(low="blue",high = "red") +
+  geom_tile()          
+dev.off()
+
+###########################################################################################################################
+
+if (!is.null(opt$conditions)) {
+  conditions <- strsplit(opt$conditions,split = "\\+")[[1]]
+  for (c in conditions) {
+    ## only implemented for a meta col AGE for the moment
+    if (!is.null(opt$groupingVar)) {
+      DefaultAssay(seurat) <- "AUCell"
+      
+      
+      
+      regulonDiffPerClust <- lapply(levels(seurat$numclust),
+                                    FindMarkerPerClustGroupVar,
+                                    seurat,
+                                    condition =c,
+                                    grouping.var = "platform",
+                                    identCol = "numclust",
+                                    test.use = "wilcox",
+                                    keepDiverging= T,
+                                    logfc.threshold = 0,
+                                    pseudocount.use = 1,
+                                    min.pct = 0.1,
+                                    filterOnPadj = F,
+                                    computeTrueDiff = T)
+      
+      
+      names(regulonDiffPerClust) <- levels(seurat$numclust)
+      
+      regulonDiffPerClustTable <- regulonDiffPerClust[[1]]
+      for (s in levels(seurat$numclust)[-1]) {
+        regulonDiffPerClustTable <- rbind(regulonDiffPerClustTable,regulonDiffPerClust[[s]])
+      }
+      
+      dim(regulonDiffPerClustTable)
+      
+      table(regulonDiffPerClustTable$Cluster)
+      
+      #Exclude AUCell score differences with a combined pval < 0.05
+      regulonDiffPerClustTable <- regulonDiffPerClustTable[which(regulonDiffPerClustTable$minimump_p_val < 0.05),]
+      
+      dim(regulonDiffPerClustTable)
+      
+      
+      
+      
+      dim(regulonDiffPerClustTable)
+      
+      #Exclude AUCell score differences with an opposite sign of variation (pval set to NA by FindAgingMarkers)
+      regulonDiffPerClustTable <- na.exclude(regulonDiffPerClustTable)
+      
+      dim(regulonDiffPerClustTable)
+      
+      
+      
+      colnames(regulonDiffPerClustTable)[which(colnames(regulonDiffPerClustTable) == "Cluster")] <- "group"
+      colnames(regulonDiffPerClustTable)[which(colnames(regulonDiffPerClustTable) == "Gene")] <- "regulon"
+      
+      
+      
+      regulonDiffPerClustTable <- regulonDiffPerClustTable[,c("regulon","numclust","group","minimump_p_val","max_pval","min_avg_diff","A_p_val","avg_diff_A","A_pct.1","A_pct.2","B_p_val","avg_diff_B","B_pct.1","B_pct.2")]
+      
+      
+      
+      
+      # Filter on average differences before writing final table
+      regulonDiffPerClustTable <- regulonDiffPerClustTable[which(abs(regulonDiffPerClustTable$min_avg_diff) > opt$scoreDiff),]
+      
+      write.table(regulonDiffPerClustTable,paste(opt$outdir,"/",c,"DiffRegulonTable.tsv",sep =""),sep = "\t")
+    } else {
+      
+      DefaultAssay(seurat) <- "AUCell"
+      
+      regulonDiffPerClust <- lapply(levels(seurat$numclust),
+                                    FindMarkerPerClust,
+                                    seurat,
+                                    condition = c,
+                                    identCol = "numclust",
+                                    test.use = "wilcox",
+                                    logfc.threshold = 0,
+                                    pseudocount.use = 1,
+                                    min.pct = 0.1,
+                                    computeTrueDiff = T)
+      
+      names(regulonDiffPerClust) <- levels(seurat$numclust)
+      
+      regulonDiffPerClustTable <- regulonDiffPerClust[[1]]
+      for (s in levels(seurat$numclust)[-1]) {
+        regulonDiffPerClustTable <- rbind(regulonDiffPerClustTable,regulonDiffPerClust[[s]])
+      }
+      
+      dim(regulonDiffPerClustTable)
+      
+      table(regulonDiffPerClustTable$Cluster)
+      
+      #Exclude AUCell score differences with an adjusted pval < 0.05
+      regulonDiffPerClustTable <- regulonDiffPerClustTable[which(regulonDiffPerClustTable$p_val_adj < 0.05),]
+      
+      dim(regulonDiffPerClustTable)
+      
+      colnames(regulonDiffPerClustTable)[which(colnames(regulonDiffPerClustTable) == "Cluster")] <- "group"
+      colnames(regulonDiffPerClustTable)[which(colnames(regulonDiffPerClustTable) == "Gene")] <- "regulon"
+      
+      
+      
+      regulonDiffPerClustTable <- regulonDiffPerClustTable[,c("regulon","numclust","group","p_val_adj","p_val","avg_diff","avg_logFC","pct.1","pct.2")]
+      
+      
+      
+      
+      # Filter on average differences before writing final table
+      regulonDiffPerClustTable <- regulonDiffPerClustTable[which(abs(regulonDiffPerClustTable$avg_diff) > opt$scoreDiff),]
+      
+      
+      
+    }
+    write.table(regulonDiffPerClustTable,paste(opt$outdir,"/",c,"DiffRegulonTable.tsv",sep =""),sep = "\t",row.names = F)
+  }
+}
+
+
+saveRDS(seurat,paste0(opt$outdir,"/seuratAUC.rds"))
+
+write.table(clusterRegulon,paste(opt$outdir,"/clusterMarkerRegulonTable.tsv",sep =""),sep = "\t",row.names = F)
+
+
+
diff --git a/R_src/seurat4AtacRnaCL.R b/R_src/seurat4AtacRnaCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..67bc200c25ba7ae136541d8f5590d0bff07bd83b
--- /dev/null
+++ b/R_src/seurat4AtacRnaCL.R
@@ -0,0 +1,84 @@
+
+CorPCPlot <- function(object, assay = NULL,reduction = "lsi", n = 10, metaCol, ...) 
+{
+  #assay <- SetIfNull(x = assay, y = DefaultAssay(object = object))
+  dr <- object[[reduction]]
+  embed <- Embeddings(object = dr)
+  counts <- object[[metaCol]]
+  embed <- embed[rownames(x = counts), ]
+  #n <- SetIfNull(x = n, y = ncol(x = embed))
+  embed <- embed[, seq_len(length.out = n)]
+  depth.cor <- as.data.frame(cor(x = embed, y = counts))
+  depth.cor$counts <- depth.cor[, 1]
+  depth.cor$Component <- seq_len(length.out = nrow(x = depth.cor))
+  p <- ggplot(depth.cor, aes(Component, counts)) + geom_point() + 
+    scale_x_continuous(n.breaks = n, limits = c(1, n)) + 
+    ylab("Correlation") + ylim(c(-1, 1)) + theme_light() + 
+    ggtitle("Correlation between depth and reduced dimension components", 
+            subtitle = paste0("Assay: ", assay, "\t", "Reduction: ", 
+                              reduction))
+  return(p)
+}
+
+
+rna <- readRDS("output/RNA/PLZF_RARA_CT/Seurat4/seurat.rds")
+atac <- readRDS("output/ATAC/smp/Ctrl/atac.rds")
+DimPlot(atac)
+
+
+##Data preprocessing
+
+## Here, we process the gene activity matrix in order to find anchors between cells in the scATAC-seq dataset and the scRNA-seq dataset.
+
+DefaultAssay(atac) <- "activities"
+atac <- FindVariableFeatures(atac)
+atac <- NormalizeData(atac)
+atac <- ScaleData(atac)
+
+transfer.anchors <- FindTransferAnchors(reference = rna, query = atac, features = VariableFeatures(object = rna), 
+                                        reference.assay = "RNA", query.assay = "activities", reduction = "cca")
+
+## To improve discard first integratedLSI ?
+celltype.predictions <- TransferData(anchorset = transfer.anchors, refdata = rna$numclust, dims = 1:30,
+                                     weight.reduction = atac[["lsi"]])
+atac <- AddMetaData(atac, metadata = celltype.predictions)
+
+hist(atac$prediction.score.max)
+abline(v = 0.5, col = "red")
+
+atac.filtered <- subset(atac, subset = prediction.score.max > 0.5)
+atac.filtered$predicted.id <- factor(atac.filtered$predicted.id, levels = levels(rna))  # to make the colors match
+p1 <- DimPlot(atac.filtered, group.by = "predicted.id", label = TRUE, repel = TRUE) + ggtitle("scATAC-seq cells") + 
+  NoLegend() + scale_colour_hue(drop = FALSE)
+p2 <- DimPlot(rna, group.by = "numclust", label = TRUE, repel = TRUE) + ggtitle("scRNA-seq cells") + 
+  NoLegend()
+p1 + p2
+
+
+# note that we restrict the imputation to variable genes from scRNA-seq, but could impute the
+# full transcriptome if we wanted to
+genes.use <- VariableFeatures(rna)
+refdata <- GetAssayData(rna, assay = "RNA", slot = "data")[genes.use, ]
+
+# refdata (input) contains a scRNA-seq expression matrix for the scRNA-seq cells.  imputation
+# (output) will contain an imputed scRNA-seq matrix for each of the ATAC cells
+## To improve discard first integratedLSI ?
+imputation <- TransferData(anchorset = transfer.anchors, refdata = refdata, weight.reduction = atac[["lsi"]])
+
+# this line adds the imputed data matrix to the atac object
+atac[["RNA"]] <- imputation
+coembed <- merge(x = rna, y = atac)
+
+# Finally, we run PCA and UMAP on this combined object, to visualize the co-embedding of both
+# datasets
+coembed <- ScaleData(coembed, features = genes.use, do.scale = FALSE)
+coembed <- RunPCA(coembed, features = genes.use, verbose = FALSE)
+coembed <- RunUMAP(coembed, dims = 1:30)
+coembed$celltype <- ifelse(!is.na(coembed$numclust), coembed$numclust, coembed$predicted.id)
+
+
+p1 <- DimPlot(coembed, group.by = "sampleName")
+p2 <- DimPlot(coembed, group.by = "celltype", label = TRUE, repel = TRUE)
+p1 + p2
+
+
diff --git a/R_src/seuratAnalysisCL.R b/R_src/seuratAnalysisCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..0e7645c45f877f73dcb80611dbfeb13e35ce5761
--- /dev/null
+++ b/R_src/seuratAnalysisCL.R
@@ -0,0 +1,388 @@
+### Supplemental analysis and cluster annotation
+
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(Seurat))
+suppressMessages(library(ggplot2))
+library(plyr)
+library(cowplot)
+library(ggplot2)
+library(RColorBrewer)
+library(scales)
+library(gProfileR)
+library(biomaRt)
+library(getopt)
+
+## -----------------------------------------------------------------------------
+## Load function
+## -----------------------------------------------------------------------------
+
+source("R_src/funForSeuratAnalysis.R")
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  "inputSeurat",  "i",1, "character", "input seurat object with cluster column names numclust",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "resolution", "r", 1, "character", "clustering resolution to use",
+  "SigList", "s", 1, "character", "Path to the directory with all the signatures (.txt)",
+  "clustName", "c", 1, "character", "List of cluster name sep by +",
+  "logfc_threshold", "f", 1, "numeric", "Set a logFC threshold",
+  "pval", "p", 1, "numeric", "Set a pval adj threshold",
+  "assay", "a", 1, "character", "Chose default assay"
+), byrow=TRUE, ncol=5);
+
+
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputSeurat)) {
+  cat("Create influence graph from regulon table")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+## ---------------------------------------------------------------------
+## Read inputs
+## ---------------------------------------------------------------------
+
+# # Testing opt
+# opt$inputSeurat <- "/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/RNA/Seurat4_integration/combined.rds"
+# opt$outdir <- "/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/RNA/Seurat4_integration/Analysis/"
+# opt$resolution <- "integrated_snn_res.0.3"
+# opt$assay <- "RNA"
+# opt$SigList <- "/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/database/Signature/"
+# opt$clustName <- "Neu2+NeuRA1+Rep+NeuRA2+Neu1+Neu3"
+
+# Read seurat object
+seurat <- readRDS(opt$inputSeurat)
+
+outdir <- opt$outdir
+
+# Set logFC threshold
+if (is.null(opt$logfc_threshold)) {
+  opt$logfc_threshold <- 0.25
+}
+
+# Set logFC threshold
+if (is.null(opt$pval)) {
+  opt$pval <- 0.05
+}
+
+# Define ref clustering
+if(is.null(opt$resolution)){
+  opt$resolution <- "integrated_snn_res.0.3"
+}
+targetRes <- opt$resolution
+
+# Define default assay
+if(is.null(opt$assay)){
+  opt$assay <- "RNA"
+}
+
+DefAssay <- opt$assay
+
+# Prepare cluster name
+clustName <- strsplit(opt$clustName, split = "\\+")[[1]]
+
+# Read signature list
+signature_files <- list.files(opt$SigList)
+
+signature_names <- strsplit(signature_files, split = ".txt")
+
+signature_list <- list()
+for(i in 1:length(signature_files)){
+  name <- signature_names[[i]]
+  print(name)
+  signature_list[[name]] <- assign(signature_names[[i]],
+                                   read.table(paste0(opt$SigList, signature_files[i]))[,1])
+}
+
+signature_list_mouse <- lapply(signature_list, convertHumanGeneList)
+
+theme_set(theme_classic())
+dir.create(outdir, recursive = T)
+
+## ---------------------------------------------------------------------
+## Color scheme
+## ---------------------------------------------------------------------
+
+colorTreatment <- c("#664CFF", "#FF8000")
+colorCluster <- c("#E69F00", "#CC79A7", "#0072B2", "#009E73", "#D55E00", "#56B4E9")
+colorPhases <- c(brewer.pal(9,"RdPu"))[c(3,6,9)]
+colorBranches <- c("#332288", "#117733", "#DDCC77", "#AA4499", "#88CCEE")
+
+##-------------------------------------------------#
+##       Data preparation : Clust name etc
+##-------------------------------------------------#
+
+DefaultAssay(seurat) <- DefAssay
+
+Idents(seurat) <- targetRes
+
+names(clustName) <- levels(seurat)
+
+seurat <- RenameIdents(seurat, clustName)
+seurat@meta.data$FinalCluster <- Idents(seurat)
+
+seurat@meta.data$FinalCluster <- factor(seurat@meta.data$FinalCluster, levels = c("Rep", "Neu1", "Neu2", "Neu3", "NeuRA1", "NeuRA2"))
+
+Idents(seurat) <- "FinalCluster"
+
+seurat$Treatment_clust <- paste(seurat$FinalCluster, seurat$condition, sep = "_") 
+seurat@meta.data$phases <- factor(seurat@meta.data$phases, levels = c("G1_G0", "S", "G2_M"))
+
+##--------------------------------------------------#
+##   Looking for cluster markers with new names
+##--------------------------------------------------#
+
+Idents(seurat) <- "FinalCluster"
+markers_cluster <- FindAllMarkers(seurat, logfc.threshold = opt$logfc_threshold)
+markers_cluster_sig <- markers_cluster[markers_cluster$p_val_adj < opt$pval,]
+
+write.table(x = markers_cluster_sig, file = paste0(outdir, "Markers_table.tsv"), quote = F, col.names = T, row.names = F, sep = "\t")
+
+##-----------------------------------------------------#
+##   Looking for geneSet enrichment on cluster markers
+##-----------------------------------------------------#
+
+enrich_clust <- list()
+for(cluster in unique(markers_cluster$cluster)){
+  print(cluster)
+  enrich_clust[[cluster]] <- gProfileR::gprofiler(markers_cluster[markers_cluster$cluster %in% cluster, "gene"],
+                                                  organism = "mmusculus",
+                                                  custom_bg = rownames(seurat),
+                                                  ordered_query = "none")
+  enrich_clust[[cluster]]$cluster <- cluster
+}
+enrich_clust.df <- do.call("rbind", enrich_clust)
+
+write.table(enrich_clust.df, file = paste0(outdir, "enrichmentTable.tsv"), sep = "\t", row.names = F, col.names = T, quote = F)
+
+
+#-------------------------------------------------#
+#              Basic UMAP plotting
+#-------------------------------------------------#
+dir.create(paste0(outdir, "UMAP/"), recursive = T)
+
+UMAP_Clust <- DimPlot(seurat, pt.size = 0.4, cols = colorCluster, group.by = "FinalCluster") + theme(axis.title = element_blank(), axis.text = element_blank())
+UMAP_treatment <- DimPlot(seurat, split.by = "condition", group.by = "condition", pt.size = 0.4, cols = colorTreatment) + theme(axis.title = element_blank(), axis.text = element_blank())
+
+# UMAP treatment split
+Idents(seurat) <- "condition"
+seurat_control <- subset(seurat, idents = "Control")
+seurat_treated <- subset(seurat, idents = "Treated")
+Idents(seurat) <- "FinalCluster"
+
+UMAP_control <- DimPlot(seurat_control, split.by = "condition", group.by = "condition", pt.size = 0.4, cols = colorTreatment[1]) + theme(axis.title = element_blank(), axis.text = element_blank(), legend.position = "none")
+UMAP_treated <- DimPlot(seurat_treated, split.by = "condition", group.by = "condition", pt.size = 0.4, cols = colorTreatment[2]) + theme(axis.title = element_blank(), axis.text = element_blank(), legend.position = "none")
+
+UMAP_CellCycle <- DimPlot(seurat, pt.size = 0.4, cols = colorPhases, group.by = "phases") + theme(axis.title = element_blank(), axis.text = element_blank())
+UMAP_CellCycle_split <- DimPlot(seurat, split.by = "condition", group.by = "phases", pt.size = 0.4, cols = colorPhases) + theme(axis.title = element_blank(), axis.text = element_blank())
+
+ggsave(paste0(outdir,"UMAP/", 'UMAP_CellCycle', '.png'), plot = UMAP_CellCycle, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 15, units = 'cm', dpi = 300)
+
+ggsave(paste0(outdir,"UMAP/", 'UMAP_split_CellCycle', '.png'), plot = UMAP_CellCycle_split, device = 'png', path = NULL,
+       scale = 1, width = 20, height = 15, units = 'cm', dpi = 300)
+ggsave(paste0(outdir,"UMAP/", 'UMAP_cluster', '.png'), plot = UMAP_Clust, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 15, units = 'cm', dpi = 300)
+
+ggsave(paste0(outdir, "UMAP/", 'UMAP_split_treatment', '.png'), plot = UMAP_treatment, device = 'png', path = NULL,
+       scale = 1, width = 20, height = 15, units = 'cm', dpi = 300)
+ggsave(paste0(outdir, "UMAP/", 'UMAP_treatment_Control', '.png'), plot = UMAP_control, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 15, units = 'cm', dpi = 300)
+ggsave(paste0(outdir, "UMAP/", 'UMAP_treatment_Treated', '.png'), plot = UMAP_treated, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 15, units = 'cm', dpi = 300)
+
+#-------------------------------------------------#
+# Generate the barplot for cell distrib in cluster
+#-------------------------------------------------#
+dir.create(paste0(outdir, "BarPlot/"), recursive = T)
+#BP of cell cycle distrib per cluster
+summary_phases <- ddply(seurat@meta.data,~FinalCluster + phases + condition, nrow)
+
+bp_phasesClust_control <- ggplot(data.frame(summary_phases[summary_phases$condition %in% "Control",]), aes(fill = phases,y = V1, x=FinalCluster)) +
+  geom_bar( stat="identity", position="fill")+
+  scale_fill_manual( values= colorPhases)+
+  scale_y_continuous(name = "Sample (%)", labels = c(0,25,50,75,100))+
+  ylab(label = "")+xlab(label = "") + coord_flip()+
+  theme(legend.title=element_blank()) + ggtitle("Control")
+
+bp_phasesClust_treated <- ggplot(data.frame(summary_phases[summary_phases$condition %in% "Treated",]), aes(fill = phases,y = V1, x=FinalCluster)) +
+  geom_bar( stat="identity", position="fill")+
+  scale_fill_manual( values= colorPhases)+
+  scale_y_continuous(name = "Sample (%)", labels = c(0,25,50,75,100))+
+  ylab(label = "")+xlab(label = "") + coord_flip()+
+  theme(legend.title=element_blank()) + ggtitle("Treated")
+
+cellCycle_legend <- get_legend(bp_phasesClust_treated)
+
+bp_grid_cellCycle <- plot_grid(bp_phasesClust_control + theme(legend.position = "none"), bp_phasesClust_treated + theme(legend.position = "none"))
+bp_cellCycle_leg_grid <- plot_grid(bp_grid_cellCycle, cellCycle_legend,  rel_widths = c(3,0.4))
+
+ggsave(paste0(outdir, "BarPlot/", 'BP_cellCycle_split', '.png'), plot = bp_cellCycle_leg_grid, device = 'png', path = NULL,
+       scale = 1, width = 20, height = 20, units = 'cm', dpi = 300)
+
+CellPerClust <- ddply(seurat@meta.data,~FinalCluster + condition, nrow)
+
+bp_rawCellCount_control <- ggplot(data.frame(CellPerClust[CellPerClust$condition %in% "Control",]), aes(y = V1, x=FinalCluster)) +
+  geom_bar( stat="identity") + coord_flip() + ylab("nCells") + expand_limits(y = c(NULL, 3000))
+bp_rawCellCount_treated <- ggplot(data.frame(CellPerClust[CellPerClust$condition %in% "Treated",]), aes(y = V1, x=FinalCluster)) +
+  geom_bar( stat="identity") + coord_flip() + ylab("nCells") + expand_limits(y = c(NULL, 3000))
+
+ggsave(paste0(outdir, "BarPlot/", 'BP_raw_cellCount_control', '.png'), plot = bp_rawCellCount_control, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 20, units = 'cm', dpi = 300)
+ggsave(paste0(outdir, "BarPlot/", 'BP_raw_cellCount_treated', '.png'), plot = bp_rawCellCount_treated, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 20, units = 'cm', dpi = 300)
+
+bp_control_raw_cycle <- plot_grid(bp_phasesClust_control + theme(legend.position = "none"), bp_rawCellCount_control + theme(axis.title.y = element_blank(), axis.text.y = element_blank()) + ggtitle(" "), rel_widths = c(2,1))
+bp_treated_raw_cycle <- plot_grid(bp_phasesClust_treated + theme(legend.position = "none"), bp_rawCellCount_treated + theme(axis.title.y = element_blank(), axis.text.y = element_blank()) + ggtitle(" "), rel_widths = c(2,1))
+
+bp_all_grid_noleg <- plot_grid(bp_control_raw_cycle, bp_treated_raw_cycle, nrow = 2)
+bp_all_grid_leg <- plot_grid(bp_all_grid_noleg, cellCycle_legend, rel_widths = c(3,0.4))
+
+ggsave(paste0(outdir, "BarPlot/", 'BP_grid_cellCycle_cellCount', '.png'), plot = bp_treated_raw_cycle, device = 'png', path = NULL,
+       scale = 1, width = 20, height = 20, units = 'cm', dpi = 300)
+
+# BP proportion treatment per cluster
+clustersampleName <- ddply(seurat@meta.data,~FinalCluster + condition,nrow)
+propExpect <- table(seurat@meta.data$condition)/length(seurat@meta.data$condition)[]
+
+enrich_clust <- getEnrichPopClust(hspc.combined = seurat, Xname = "Control", Yname = "Treated", colorX = colorTreatment[1], colorY = colorTreatment[2], metaCol = "condition", clustCol = "FinalCluster")
+
+clustersampleName$condition <- factor(clustersampleName$condition, levels = c("Treated", "Control"))
+
+bp_treatmentClust <- ggplot(data.frame(clustersampleName), aes(fill = condition,y = V1, x=FinalCluster)) +
+  geom_bar( stat="identity", position="fill")+
+  scale_fill_manual( values= rev(colorTreatment))+
+  scale_y_continuous(name = "Sample (%)", labels = c(0,25,50,75,100))+
+  ylab(label = "")+xlab(label = "") + coord_flip()+
+  theme(legend.title=element_blank(), axis.text.y = element_text(colour = enrich_clust[,"color"])) +
+  geom_hline(yintercept = propExpect[1])
+
+ggsave(paste0(outdir, "BarPlot/",'BP_treatment', '.png'), plot = bp_treatmentClust, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 20, units = 'cm', dpi = 300)
+
+# BP cell number distribution per cluster 
+CellPerClust <- ddply(seurat@meta.data,~FinalCluster, nrow)
+
+bp_rawTreatmentClust <- ggplot(data.frame(CellPerClust), aes(y = V1, x=FinalCluster)) +
+  geom_bar( stat="identity") + coord_flip() + ylab("nCells")
+
+ggsave(paste0(outdir, "BarPlot/",'BP_raw_treatment', '.png'), plot = bp_rawTreatmentClust, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 20, units = 'cm', dpi = 300)
+
+# Arranging BP grid
+treatment_legend <- get_legend(bp_treatmentClust)
+
+bp_grid <- plot_grid(bp_treatmentClust + theme(legend.position = "none"), 
+                     bp_rawTreatmentClust + theme(axis.text.y = element_blank(), axis.title.y = element_blank()), 
+                     treatment_legend, rel_widths = c(2,1,0.5), nrow = 1)
+
+ggsave(paste0(outdir, "BarPlot/",'BP_grid_treatment', '.png'), plot = bp_grid, device = 'png', path = NULL,
+       scale = 1, width = 15, height = 20, units = 'cm', dpi = 300)
+
+
+#-------------------------------------------------#
+#      Create signature assay
+#-------------------------------------------------#
+
+for(sig in signature_names){
+  print(sig)
+  print(head(signature_list_mouse[[sig]]))
+  seurat <- AddModuleScore(seurat, features = list(signature_list_mouse[[sig]]), name = sig)
+  colnames(seurat@meta.data)[which(colnames(seurat@meta.data) == paste0(sig, 1))] <- sig
+}
+
+dir.create(paste0(outdir, "Signature/"), recursive = T)
+
+seurat[["Signature"]] <- CreateAssayObject(data = t(as.matrix(seurat@meta.data[,do.call("rbind", signature_names)[,1]])))
+
+DefaultAssay(seurat) <- "Signature"
+
+##------------------------------------------------
+##   Signature analysis
+##------------------------------------------------
+
+
+
+for(sig in signature_names){
+  print(sig)
+  Idents(seurat) <- "FinalCluster"
+  sig_assay <- gsub(sig, pattern = "_", replacement = "-")
+  print(sig_assay)
+  
+  markers_sig <- FindAllMarkers(seurat, logfc.threshold = 0, min.pct = 0, features = sig_assay)
+  
+  score_clust <- list()
+  for(cluster in levels(seurat$FinalCluster)){
+    cells_clust <- rownames(seurat@meta.data[seurat@meta.data$FinalCluster %in% cluster,])
+    cells_other <- rownames(seurat@meta.data[!seurat@meta.data$FinalCluster %in% cluster,])
+    score_clust[[cluster]] <- featureDiff(seurat = seurat, cells.1 = cells_clust, cells.2 = cells_other, feature = sig_assay)
+  }
+  summary_score_clust <- do.call("rbind", score_clust)
+  
+  rownames(markers_sig) <- markers_sig$cluster
+  markers_sig$Diff <- summary_score_clust[rownames(markers_sig),1]
+  
+  # Find population in each cluster were the signature is overexpressed
+  Idents(seurat) <- "Treatment_clust"
+  # print()
+  summary_marker_split <- list()
+  for(cluster in levels(seurat$FinalCluster)){
+    print(paste0("Starting treatment effect on ", cluster))
+    summary_marker_split[[paste(cluster, "Treated_vs_Control", sep = "_")]] <- FindMarkers(object = seurat, 
+                                                                                               ident.1 = paste(cluster, "Treated", sep = "_"),
+                                                                                               ident.2 = paste(cluster, "Control", sep = "_"),
+                                                                                               logfc.threshold = 0, min.pct = 0, features = sig_assay)
+    
+    summary_marker_split[[paste(cluster, "Treated_vs_Control", sep = "_")]]$Cluster <- cluster
+    summary_marker_split[[paste(cluster, "Treated_vs_Control", sep = "_")]]$Comparison <- "Treated_vs_Control"
+  }
+  
+  summary_marker_split.df <- do.call("rbind", summary_marker_split)
+  
+  # Find population in each cluster were the signature is overexpressed
+  score_clustTreat <- list()
+  for(cluster in levels(seurat$FinalCluster)){
+    cells_clust_treated <- rownames(seurat@meta.data[seurat@meta.data$Treatment_clust %in% paste(cluster, "Treated", sep = "_"),])
+    cells_clust_control <- rownames(seurat@meta.data[seurat@meta.data$Treatment_clust %in% paste(cluster, "Control", sep = "_"),])
+    score_clustTreat[[paste(cluster, "Treated_vs_Control", sep = "_")]] <- featureDiff(seurat = seurat, cells.1 = cells_clust_treated, cells.2 = cells_clust_control, feature = sig_assay)
+  }
+  summary_score_clustTreat <- do.call("rbind", score_clustTreat)
+  
+  summary_marker_split.df$Diff <- summary_score_clustTreat[rownames(summary_marker_split.df),1]
+  
+  # Violin plot of the signature
+  vln_sig <- VlnPlot(seurat, features = sig_assay, pt.size = 0, cols = colorCluster, group.by = "FinalCluster")
+  
+  vln_sig_split <- VlnPlot(seurat, features = sig_assay, group.by = "FinalCluster", split.by = "condition", cols = colorTreatment, pt.size = 0)
+  
+  #Saving data
+  write.table(markers_sig, file = paste0(outdir, "Signature/", "Table_", sig, ".tsv"), quote = F, row.names = F, col.names = T, sep = "\t")
+  
+  write.table(summary_marker_split.df, file = paste0(outdir, "Signature/", "Table_", sig, "_Treatment", ".tsv"), quote = F, row.names = F, col.names = T, sep = "\t")
+  
+  ggsave(paste0(outdir, "Signature/", 'Vln_', sig, '.png'), plot = vln_sig, device = 'png', path = NULL,
+         scale = 1, width = 15, height = 10, units = 'cm', dpi = 300)
+  
+  ggsave(paste0(outdir, "Signature/", 'Vln_', sig,  '_Treatment', '.png'), plot = vln_sig_split, device = 'png', path = NULL,
+         scale = 1, width = 15, height = 10, units = 'cm', dpi = 300)
+  
+}
+
+Idents(seurat) <- "FinalCluster"
+DefaultAssay(seurat) <- DefAssay
+
+saveRDS(seurat, paste0(outdir, "seurat_annotated", ".rds"))
diff --git a/R_src/signacSmpCL.R b/R_src/signacSmpCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..7fa4d978d9c7654481fdb4f1a091f005f1045295
--- /dev/null
+++ b/R_src/signacSmpCL.R
@@ -0,0 +1,433 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+library(Signac)
+library(Seurat)
+library(GenomeInfoDb)
+library(EnsDb.Mmusculus.v79)
+library(ggplot2)
+library(patchwork)
+library(getopt)
+
+set.seed(1234)
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputDir',  'i', 1, "character", "REQUIRED : Celranger data raw dir with filtered_peak_bc_matrix.h5,singlecell.csv and fragments.tsv.gz files",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "num_dim",          'n', 1, "numeric", "first n dimension of the SVD to use for the reclustering",
+  "firstDimLSI",   "f",0, "logical", "Use first LSI dim, default is do not use as it is often strongly correlated to sequencing depht",
+  "logfc_threshold", "d", 1, "numeric", "logfc threshold for finding cluster markers (1 cluster vs all deg) 0.25 by default",
+  "sampleInfo",   "s", 1, "character", "sample information that will be added to pData with the following format: age=2_months,runDate=10_12_2017..",
+  "peakRegionFragmentsMadUp", "u", 1, "numeric", "cell with peaks in region fragment upper than u mad will be discarded (default n = 2)",
+  "peakRegionFragmentsMadLow", "l", 1, "numeric", "cell with peaks in region fragment lower than l mad will be discarded (default n = 2)",
+  "pctReadsInPeaksLow", "r", 1, "numeric", "cells with percentage reads in peaks lower than r % will be discarded (default = 15)",
+  "blackListRatio", "b", 1, "numeric", "cells with a blacklist ratio higher than b will be discarded (default b = 0.05)",
+  "nucleosomeSignal", "c", 1 , "numeric", "cells with a nucleosome signal higher tban n will be discarded (default n = 4)",
+  "tssEnrichment", "t", 1, "numeric","cells with a tssEnrichment lower than t will be discarded (default t = 2)"
+  
+), byrow=TRUE, ncol=5);
+
+
+
+opt = getopt(spec)
+
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputDir)) {
+  cat("Perform signac sample processing, quality control, genes activity, peak calling, DE peaks")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+
+if (is.null(opt$logfc_threshold)) {
+  opt$logfc_threshold = 0.25
+} 
+
+if (is.null(opt$num_dim)) {
+  opt$num_dim = 40
+} 
+
+if (is.null(opt$firstDimLSI)) {
+  opt$firstDimLSI <- FALSE
+}
+
+if (is.null(opt$peakRegionFragmentsMadUp)) {
+  opt$peakRegionFragmentsMadUp <- 2
+}
+
+if (is.null(opt$peakRegionFragmentsMadLow)) {
+  opt$peakRegionFragmentsMadLow <- 2
+}
+
+if (is.null(opt$pctReadsInPeaksLow)) {
+  opt$pctReadsInPeaksLow <- 15
+}
+
+if (is.null(opt$blackListRatio)) {
+  opt$blackListRatio <- 0.05
+}
+
+if (is.null(opt$nucleosomeSignal)) {
+  opt$nucleosomeSignal <- 4
+}
+
+if (is.null(opt$tssEnrichment)) {
+  opt$tssEnrichment <- 2
+}
+
+
+print(opt)
+
+
+
+## ----load------------------------------------------------------------------------------------------------------
+counts <- Read10X_h5(filename =paste0(opt$inputDir,"/filtered_peak_bc_matrix.h5"))
+metadata <- read.csv(
+  file = paste0(opt$inputDir,"/singlecell.csv"),
+  header = TRUE,
+  row.names = 1
+)
+
+chrom_assay <- CreateChromatinAssay(
+  counts = counts,
+  sep = c(":", "-"),
+  genome = 'mm10',
+  fragments = paste0(opt$inputDir,"/fragments.tsv.gz"),
+  min.cells = 10,
+  min.features = 200
+)
+
+prom <- CreateSeuratObject(
+  counts = chrom_assay,
+  assay = "peaks",
+  meta.data = metadata
+)
+
+#Add sample infos
+
+print(opt$sampleInfo)
+sampleInfos <- strsplit(opt$sampleInfo,split=",")[[1]]
+for (i in sampleInfos) {
+  print(i)
+  info <- strsplit(i,split = "=")[[1]][1]
+  print(info)
+  value <- strsplit(i,split = "=")[[1]][2]
+  print(value)
+  prom@meta.data[,info] <- value
+}
+
+## --------------------------------------------------------------------------------------------------------------
+prom
+
+
+## --------------------------------------------------------------------------------------------------------------
+prom[["peaks"]]
+
+
+## --------------------------------------------------------------------------------------------------------------
+gr <- granges(prom)
+
+df <- data.frame(seqnames=seqnames(gr),
+                 starts=start(gr)-1,
+                 ends=end(gr),
+                 names=c(rep(".", length(gr))),
+                 scores=c(rep(".", length(gr))),
+                 strands=strand(gr))
+
+write.table(df, file=paste0(opt$outdir,"/10X_peaks.bed"), quote=F, sep="\t", row.names=F, col.names=F)
+## --------------------------------------------------------------------------------------------------------------
+# extract gene annotations from EnsDb
+annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Mmusculus.v79)
+
+# change to UCSC style since the data was mapped to hg19
+seqlevelsStyle(annotations) <- 'UCSC'
+genome(annotations) <- "mm10"
+
+# add the gene information to the object
+Annotation(prom) <- annotations
+
+
+## --------------------------------------------------------------------------------------------------------------
+# compute nucleosome signal score per cell
+prom <- NucleosomeSignal(object = prom)
+
+# compute TSS enrichment score per cell
+prom <- TSSEnrichment(object = prom, fast = FALSE)
+
+# add blacklist ratio and fraction of reads in peaks
+prom$pct_reads_in_peaks <- prom$peak_region_fragments / prom$passed_filters * 100
+prom$blacklist_ratio <- prom$blacklist_region_fragments / prom$peak_region_fragments
+
+
+## --------------------------------------------------------------------------------------------------------------
+prom$high.tss <- ifelse(prom$TSS.enrichment > 2, 'High', 'Low')
+png(paste0(opt$outdir,"/TSSplot.png"),units = "in",height = 6,width=5,res = 300)
+TSSPlot(prom, group.by = 'high.tss') + NoLegend()
+dev.off()
+
+## --------------------------------------------------------------------------------------------------------------
+prom$nucleosome_group <- ifelse(prom$nucleosome_signal > 2, 'NS > 2', 'NS < 2')
+FragmentHistogram(object = prom,region = "chr1-1-100000000",group.by = 'nucleosome_group')
+
+
+## ----fig.height=5,fig.width=14---------------------------------------------------------------------------------
+png(paste0(opt$outdir,"/qcMetrics.png"),units = "in",height = 5,width=14,res = 300)
+VlnPlot(
+  object = prom,
+  features = c('pct_reads_in_peaks', 'peak_region_fragments',
+               'TSS.enrichment', 'blacklist_ratio', 'nucleosome_signal'),
+  pt.size = 0.001,
+  ncol = 5
+)
+dev.off()
+
+## -------------------------------------------------------------------------------------
+minCountThreshold <- median(prom$peak_region_fragments) -opt$peakRegionFragmentsMadLow*mad(prom$peak_region_fragments)
+maxCountThreshold <- median(prom$peak_region_fragments)  +opt$peakRegionFragmentsMadUp*mad(prom$peak_region_fragments)
+  
+png(paste0(opt$outdir,"/peak_region_fragments_raw.png"),units = "in",height = 5,width=6,res = 300)
+VlnPlot(
+  object = prom,
+  features = c('peak_region_fragments'),
+  pt.size = 0.001,
+) +   geom_hline(yintercept = minCountThreshold) +
+  geom_hline(yintercept = maxCountThreshold)
+dev.off()
+
+
+## --------------------------------------------------------------------------------------------------------------
+prom <- subset(
+  x = prom,
+  subset = peak_region_fragments > median(prom$peak_region_fragments) -opt$peakRegionFragmentsMadLow*mad(prom$peak_region_fragments) &
+    peak_region_fragments < median(prom$peak_region_fragments) +opt$peakRegionFragmentsMadUp*mad(prom$peak_region_fragments) &
+    pct_reads_in_peaks > opt$pctReadsInPeaksLow &
+    blacklist_ratio < opt$blackListRatio &
+    nucleosome_signal < opt$nucleosomeSignal &
+    TSS.enrichment > opt$tssEnrichment
+)
+prom
+
+## -------------------------------------------------------------------------------------
+
+png(paste0(opt$outdir,"/peak_region_fragments_filtered.png"),units = "in",height = 5,width=6,res = 300)
+VlnPlot(
+  object = prom,
+  features = c('peak_region_fragments'),
+  pt.size = 0.001,
+)
+dev.off()
+
+
+
+## ----fig.height=5,fig.width=14---------------------------------------------------------------------------------
+png(paste0(opt$outdir,"/qcMetricsFiltered.png"),units = "in",height = 5,width=14,res = 300)
+VlnPlot(
+  object = prom,
+  features = c('pct_reads_in_peaks', 'peak_region_fragments',
+               'TSS.enrichment', 'blacklist_ratio', 'nucleosome_signal'),
+  pt.size = 0.001,
+  ncol = 5
+)
+dev.off()
+
+
+## --------------------------------------------------------------------------------------------------------------
+prom <- RunTFIDF(prom)
+prom <- FindTopFeatures(prom, min.cutoff = 'q75')
+prom <- RunSVD(prom, n =opt$num_dim)
+
+
+## --------------------------------------------------------------------------------------------------------------
+png(paste0(opt$outdir,"/DepthCor.png"),units = "in",height = 6,width=6,res = 300)
+DepthCor(prom)
+dev.off()
+
+## --------------------------------------------------------------------------------------------------------------
+if (opt$firstDimLSI) {
+  dims = c(1:30)
+} else {
+  dims = c(2:30)
+}
+
+prom <- RunUMAP(object = prom, reduction = 'lsi', dims = dims)
+prom <- FindNeighbors(object = prom, reduction = 'lsi', dims = dims)
+prom <- FindClusters(object = prom, verbose = FALSE, algorithm = 3)
+png(paste0(opt$outdir,"/UMAP_clust.png"),units = "in",height = 6,width=6,res = 300)
+DimPlot(object = prom, label = TRUE) + NoLegend()
+dev.off()
+
+
+## --------------------------------------------------------------------------------------------------------------
+gene.activities <- GeneActivity(prom)
+
+
+## --------------------------------------------------------------------------------------------------------------
+# add the gene activity matrix to the Seurat object as a new assay and normalize it
+prom[['activities']] <- CreateAssayObject(counts = gene.activities)
+prom <- NormalizeData(
+  object = prom,
+  assay = 'activities',
+  normalization.method = 'LogNormalize',
+  scale.factor = median(prom$nCount_activities)
+)
+
+## --------------------------------------------------------------------------------------------------------------
+DefaultAssay(prom) <- 'activities'
+markers <- FindAllMarkers(object = prom,only.pos = T)
+markers <- markers[order(markers$p_val_adj),]
+head(markers)
+markers <- markers[markers$p_val_adj<0.05,]
+
+write.table(markers,paste0(opt$outdir,"/geneActMarkers.tsv"),quote = F,sep = '\t',row.names = F)
+
+## ----fig.height=8,fig.width=6----------------------------------------------------------------------------------
+png(paste0(opt$outdir,"/FeaturePlot.png"),units = "in",height = 8,width=6,res = 300)
+FeaturePlot(
+  object = prom,
+  features = head(markers$gene),
+  pt.size = 0.1,
+  max.cutoff = 'q95',
+)
+dev.off()
+
+
+## --------------------------------------------------------------------------------------------------------------
+DefaultAssay(prom) <- 'peaks'
+
+da_peaks <- FindAllMarkers(
+  object = prom,
+
+  min.pct = 0.2,
+  test.use = 'LR',
+  latent.vars = 'peak_region_fragments'
+)
+da_peaks <- da_peaks[da_peaks$p_val_adj < 0.05,]
+head(da_peaks)
+write.table(da_peaks,paste0(opt$outdir,"/peakMarkers.tsv"),quote = F,sep = '\t',row.names = F)
+
+
+## --------------------------------------------------------------------------------------------------------------
+
+#da_peaks <- da_peaks[da_peaks$pct.1>0.4,]
+
+closest_genes_clusterPeaks <- ClosestFeature(prom, regions = rownames(da_peaks))
+
+
+head(closest_genes_clusterPeaks)
+
+
+## ----fig.height=8,fig.width=8----------------------------------------------------------------------------------
+
+plot1 <- VlnPlot(
+  object = prom,
+  features = rownames(da_peaks)[c(1,10)],
+  pt.size = 0.1,ncol = 1,
+)
+plot2 <- FeaturePlot(
+  object = prom,
+  features =rownames(da_peaks)[c(1,10)],
+  pt.size = 0.1,ncol = 1,
+) 
+png(paste0(opt$outdir,"/peaksPlot.png"),units = "in",height = 8,width=8,res = 300)
+plot1 | plot2
+dev.off()
+
+
+
+## --------------------------------------------------------------------------------------------------------------
+#closest_genes_clusterPeaks[closest_genes_clusterPeaks$query_region %in% rownames(da_peaks[order(da_peaks$avg_logFC,decreasing = T),])[c(1,10)],"gene_name"]
+#To do add to peak markers table
+
+
+## --------------------------------------------------------------------------------------------------------------
+
+
+## --------------------------------------------------------------------------------------------------------------
+# set plotting order
+
+png(paste0(opt$outdir,"/CoveragePlot.png"),units = "in",height = 8,width=8,res = 300)
+CoveragePlot(
+  object = prom,
+  region = rownames(da_peaks)[1],
+  extend.upstream = 40000,
+  extend.downstream = 20000
+)
+dev.off()
+
+
+# # Trying macs2 call peaks
+# promMacs <- prom
+# 
+# peaksMacs <- CallPeaks(
+#   object = prom,
+#   macs2.path = "/shared/home/lherault/bin/miniconda3/envs/signac_env/bin/macs2" #TO DO update env and Ã¢s macc2 path through command line
+# )
+# 
+# 
+# gr <- peaksMacs
+# df <- data.frame(seqnames=seqnames(gr),
+#                  starts=start(gr)-1,
+#                  ends=end(gr),
+#                  names=c(rep(".", length(gr))),
+#                  scores=c(rep(".", length(gr))),
+#                  strands=strand(gr))
+# 
+# write.table(df, file=paste0(opt$outdir,"/macs2_peaks.bed"), quote=F, sep="\t", row.names=F, col.names=F)
+# 
+# 
+# 
+# # call peaks for each cell type using MACS2
+# DefaultAssay(pbmc) <- "cellranger"
+# peaks <- CallPeaks(
+#   object = pbmc,
+#   group.by = "celltype",
+#   additional.args = "--max-gap 50"
+# )
+# 
+# peaks <- keepStandardChromosomes(peaks, pruning.mode = "coarse")
+# 
+# # remove peaks in blacklist regions
+# peaks <- subsetByOverlaps(x = peaks, ranges = blacklist_hg38_unified, invert = TRUE)
+# 
+# # quantify peaks
+# peakcounts <- FeatureMatrix(
+#   fragments = Fragments(pbmc),
+#   features = peaks,
+#   cells = colnames(pbmc)
+# )
+# 
+# pbmc[["ATAC"]] <- CreateChromatinAssay(
+#   counts = peakcounts,
+#   min.cells = 5,
+#   genome = "hg38",
+#   fragments = fragments,
+#   annotation = annotations
+# )
+# 
+# DefaultAssay(pbmc) <- "ATAC"
+# 
+# pbmc <- FindTopFeatures(pbmc, min.cutoff = 10)
+# pbmc <- RunTFIDF(pbmc)
+# pbmc <- RunSVD(pbmc)
+# pbmc <- RunUMAP(pbmc, reduction = "lsi", dims = 2:40, reduction.name = "umap.atac")
+# pbmc <- FindNeighbors(pbmc, reduction = "lsi", dims = 2:40)
+# pbmc <- FindClusters(pbmc, algorithm = 3)
+
+
+
+## --------------------------------------------------------------------------------------------------------------
+saveRDS(prom,paste0(opt$outdir,"/atac.rds"))
+
diff --git a/R_src/signac_integration.R b/R_src/signac_integration.R
new file mode 100644
index 0000000000000000000000000000000000000000..092514fc870360454655914bd8abd84da69c3aac
--- /dev/null
+++ b/R_src/signac_integration.R
@@ -0,0 +1,272 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(Signac))
+suppressMessages(library(Seurat))
+suppressMessages(library(getopt))
+suppressMessages(library(gridExtra))
+# suppressMessages(library(gProfileR))
+# suppressMessages(library(RColorBrewer))
+# suppressMessages(library(readxl))
+# suppressMessages(library(stringr))
+suppressMessages(library(scales))
+# suppressMessages(library(cowplot))
+# suppressMessages(library(sctransform))
+# suppressMessages(library(stringr))
+# suppressMessages(library(plyr))
+suppressMessages(library(grid))
+
+suppressMessages(library(S4Vectors))
+suppressMessages(library(patchwork))
+set.seed(2021)
+
+source("R_src/getSigPerCells.R")
+source("R_src/Enrichment.R")
+source("R_src/funForSeurat.R")
+
+
+# Seurat 3 integration workflow
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputFiles',  'i', 1, "character", "REQUIRED: 10X dataset paths prepared as atac.rds object (.RDS generated by signacSmpCL.R) separated by +",
+  'minCutOff',   'm', 1, "character" , "minimum Cut Off on count for peaks (default 50)",
+  'subsetInterPeaks', 's',1, "character", "REQUIRED:sizie of subset intersection peaks used for integration (default 10000)",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "num_dim_CCA", 'q',1,"numeric", "First n dimensions of CCA to use for FindIntegrationAnchors Seurat function (15 by default)",
+  "num_dim_weight", 'w',1,"numeric", "First n dimensions to use for IntegrateData Seurat function (15 by default)",
+  "num_dim",      'n',1,"numeric", "First n dimensions of PCA to use for clustering, UMAP and TSNE (15 by default)",
+  "num_dim_integrated",'N',1,"numeric", "Number of PCA dimension computed to analyse integrated data (40 by default)",
+  "cores",        'c',1, "numeric", "Number of cores to use for ordering (for differencially expressed gene between clusters test)",
+  "resolution",    'r',1, "numeric", "resolution for hspc.combined clustering",
+  "correction",      "b",1,"character", "Covariable to use as blocking factor (eg one or several columns of pData: betch, cell cycle phases... separated by +)",
+  "logfc_threshold", "l", 1, "numeric", "logfc threshold for finding cluster markers (1 cluster vs all deg) 0.25 by default",
+  "rodriguezSig", "k", 1, "character", "path for xls file of Rodriguez result",
+  "norm_method", "z",1, "character", "normalization method, logNorm (by default) or sctransform",
+  "reusePca", "p", 0, "character","re use pca calculated before when caculating anchor weights for each dataset default to FALSE (permit to correct for cell cycle before integration)"
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+## For test
+setwd("/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/")
+opt <- list()
+opt$inputFiles <- "output/ATAC/smp/RA/atac.rds+output/ATAC/smp/Ctrl/atac.rds"
+opt$subsetInterPeaks <- 10000
+opt$minCutOff <- 50
+opt$outdir <- "output/ATAC/integration/"
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputFiles)) {
+  cat("Perform Seurat 3 integration workflow, then cluster the cell with seurat 3 at different resolutions")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+#set default arguments values
+
+if (is.null(opt$resolution)) {
+  opt$resolution <- 0.6
+}
+
+if (is.null(opt$logfc_threshold)) {
+  opt$logfc_threshold <- 0.25
+}
+
+if (is.null(opt$num_dim_CCA)) {
+  opt$num_dim_CCA <- 15
+}
+
+if (is.null(opt$num_dim_weight)) {
+  opt$num_dim_weight <- 15
+}
+
+if (is.null(opt$num_dim)) {
+  opt$num_dim <- 15
+}
+
+if (is.null(opt$num_dim_integrated)) {
+  opt$num_dim_integrated <- 40
+}
+
+if (is.null(opt$norm_method)) {
+  opt$norm_method <- "logNorm"
+}
+
+if (is.null(opt$reusePca)) {
+  opt$reusePca <- FALSE
+}
+
+#blank <- grid.rect(gp=gpar(col="white"))
+dir.create(opt$outdir,recursive = T,showWarnings = F)
+
+print(opt)
+
+
+############################################################################################################################################
+## QC preprocessing already done in the workflow
+## Get common features across the datasets 
+
+# load object
+
+listFile <- strsplit(opt$inputFiles,split = "\\+")[[1]]
+
+smpList <- list()
+#load dataset 
+
+for (i in 1:length(x = listFile)) {
+  #For testing, in final workflow sampleName will be incorporated in metadata with the loading of cell ranger matrix
+  sampleName <- strsplit(listFile[i],split = "/")[[1]][4]
+  smpList[[i]] <- readRDS(listFile[i])
+  #smpList[[i]]@meta.data$sampleName <- sampleName
+  smpList[[i]] <- RenameCells(smpList[[i]],add.cell.id = sampleName)
+  
+}
+
+
+# find peaks that intersect in both datasets
+intersecting.regions <- findOverlaps(query = smpList[[1]], subject = smpList[[2]])
+
+# find the coordinates of peaks in the first sample that intersect peaks in the second sample
+intersections.2 <- unique(queryHits(intersecting.regions))
+
+# choose a subset of intersecting peaks
+## TO DO 
+peaks.use <- sort(granges(smpList[[1]])[sample(intersections.2, size = opt$subsetInterPeaks, replace = FALSE)])
+
+# count fragments per cell overlapping the set of peaks in the secon sample data
+peaks <- FeatureMatrix(
+  fragments = Fragments(smpList[[2]]),
+  features = peaks.use,
+  cells = colnames(smpList[[2]])
+)
+
+# create a new assay and add it to the second sample dataset
+smpList[[2]][['firstSamplePeaks']] <- CreateChromatinAssay(
+  counts = peaks,
+  min.cells = 1,
+  ranges = peaks.use,
+  genome = 'mm10'
+)
+
+DefaultAssay(smpList[[2]]) <- 'firstSamplePeaks'
+smpList[[2]] <- RunTFIDF(smpList[[2]])  ##normalisation
+
+
+#############################################################################################################################################
+### Integration
+## Look at the data without integration first
+# create a new assay in the first sample-ATAC-seq dataset containing the common peaks
+peaknames <- GRangesToString(grange = peaks.use)
+
+smpList[[1]][['firstSamplePeaks']] <- CreateChromatinAssay(
+  counts <- GetAssayData(smpList[[1]], assay = "peaks", slot = "counts")[peaknames, ],
+  ranges = peaks.use,
+  genome = "mm10"
+)
+
+# run TF-IDF for the new assay
+DefaultAssay(smpList[[1]]) <- "firstSamplePeaks"
+smpList[[1]] <- RunTFIDF(smpList[[1]])
+
+unintegrated <- merge(smpList[[1]], smpList[[2]])
+DefaultAssay(unintegrated) <- "firstSamplePeaks"
+unintegrated <- RunTFIDF(unintegrated)
+unintegrated <- FindTopFeatures(unintegrated, min.cutoff = opt$minCutOff)
+unintegrated <- RunSVD(unintegrated)
+
+png(paste0(opt$outdir,"/unintegratedDepthCor.png"),units = "in",height = 8,width=4,res = 300)
+DepthCor(unintegrated)
+dev.off()
+# first lsi dim not used ?
+unintegrated <- RunUMAP(unintegrated, reduction = 'lsi', dims = 2:30)
+
+p1 <- DimPlot(unintegrated, group.by = 'sampleName', pt.size = 0.1) + ggplot2::ggtitle("Unintegrated")
+
+## Integration with anchors
+# find integration anchors between 10x and sci-ATAC
+anchors <- FindIntegrationAnchors(
+  object.list = list(smpList[[2]], smpList[[1]]),
+  anchor.features = rownames(smpList[[2]]),
+  assay = c('firstSamplePeaks', 'firstSamplePeaks'),
+  k.filter = NA
+)
+
+# integrate data and create a new merged object
+integrated <- IntegrateData(
+  anchorset = anchors,
+  weight.reduction = list(smpList[[2]][['lsi']],smpList[[1]][['lsi']]),
+  dims = 2:30,  # first dim discarded
+  preserve.order = TRUE    # the larger object has been put first sample and will be the reference
+)
+
+# we now have a "corrected" TF-IDF matrix, and can run LSI again on this corrected matrix
+integrated <- RunSVD(
+  object = integrated,
+  n = 30,
+  reduction.name = 'integratedLSI'
+)
+
+png(paste0(opt$outdir,"/DepthCor.png"),units = "in",height = 6,width=6,res = 300)
+DepthCor(integrated,reduction = "integratedLSI",assay = "firstSamplePeaks")
+dev.off()
+
+integrated <- RunUMAP(
+  object = integrated,
+  dims = 2:30,
+  reduction = 'integratedLSI'
+)
+
+png(paste0(opt$outdir,"/UmapUnintegratedVSIntegrated.png"),units = "in",height = 8,width=4,res = 300)
+p2 <- DimPlot(integrated, group.by = 'sampleName', pt.size = 0.1) + ggplot2::ggtitle("Integrated")
+grid.arrange(p1,p2)
+dev.off()
+
+############################################################################################################################################
+### Clustering on lsi integrated space
+integrated <- FindNeighbors(object = integrated, reduction = 'integratedLSI', dims =  2:30)
+integrated <- FindClusters(object = integrated, verbose = FALSE, algorithm = 3)
+
+png(paste0(opt$outdir,"/UmapIntegratedClust.png"),units = "in",height = 6,width=6,res = 300)
+DimPlot(object = integrated, label = TRUE) + NoLegend()
+dev.off()
+
+
+
+#####################################################################################################################
+### trying harmony
+
+# library(harmony)
+# 
+# hm.integrated <- RunHarmony(
+#   object = unintegrated,
+#   group.by.vars = 'condition',
+#   reduction = 'lsi',
+#   assay.use = 'firstSamplePeaks',
+#   project.dim = FALSE
+# )
+# 
+# # re-compute the UMAP using corrected LSI embeddings
+# DepthCor(hm.integrated)
+# hm.integrated <- RunUMAP(hm.integrated, dims = 2:30, reduction = 'harmony')
+# p5 <- DimPlot(hm.integrated, group.by = 'condition', pt.size = 0.1) + ggplot2::ggtitle("Harmony integration")
+# p1 + p5
+
+saveRDS(integrated,paste(opt$outdir,"/atacCombined.rds",sep =""))
+
+
+
+
+
+
diff --git a/R_src/streamSeuratCL.R b/R_src/streamSeuratCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..0739876c0f7213d526f070b1cefa7ecccb97acbb
--- /dev/null
+++ b/R_src/streamSeuratCL.R
@@ -0,0 +1,226 @@
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(Seurat))
+suppressMessages(library(ggplot2))
+suppressMessages(library(scales))
+suppressMessages(library(getopt))
+suppressMessages(library(plyr))
+suppressMessages(library(RColorBrewer))
+
+# Do hypergeometric test of to population to find if ine proportion is significantly increased
+getEnrichAge <- function(hspc.combined,clustCol ='clusterName',metaCol = "age") {
+  
+  table <- table(hspc.combined@meta.data[,metaCol],hspc.combined@meta.data[,clustCol])
+  
+  #Remove null column in case of reclustering has been made
+  
+  table <- table[,as.vector(which(colSums(table)>0))]
+  
+  tablePercent <- prop.table(table,2)
+  
+  propExpect <- table(hspc.combined@meta.data[,metaCol])/length(hspc.combined@meta.data[,metaCol])
+  propExpectAge_1<- propExpect[[unique(hspc.combined@meta.data[,metaCol])[1]]]
+  propExpectAge_2<- propExpect[[unique(hspc.combined@meta.data[,metaCol])[2]]]
+  phyper <- rep(NA,length(colnames(table)))
+  enrich <- rep(NA,length(colnames(table)))
+  tablePercent <- rbind(tablePercent,enrich,phyper)
+  
+  
+  for (age in unique(hspc.combined@meta.data[,metaCol])) {
+    for (cluster in colnames(table)) {
+      if(tablePercent[age,cluster] > propExpect[[age]]) {
+        cells_pull_marked <- table[age,as.character(cluster)]
+        cells_pull <- as.numeric(colSums(table)[as.character(cluster)])
+        cells_marked_all <- rowSums(table)[age]
+        all_cells <- length(hspc.combined@meta.data[,metaCol])
+        
+        
+        
+        p.value <-  phyper(q=cells_pull_marked -1, 
+                           m=cells_marked_all,
+                           n=all_cells - cells_marked_all, k= cells_pull, lower.tail=FALSE)
+        
+        tablePercent["enrich",cluster] <- age
+        
+        tablePercent["phyper",cluster] <- p.value
+        
+      }
+    }
+  }
+  return(tablePercent)
+  
+}
+
+# Hypergeometric test and plotting the result with a bp function, use a getEnrichAge
+getEnrichPopClust <- function(hspc.combined, Xname, Yname, colorX, colorY, metaCol = "AGE", clustCol = "numclust"){
+  conditionEnrich <- getEnrichAge(hspc.combined = hspc.combined,clustCol = clustCol, metaCol = metaCol)
+  conditionEnrich <- as.data.frame(t(conditionEnrich))
+  conditionEnrich$color <- "black"
+  conditionEnrich[which(as.numeric(as.vector(conditionEnrich$phyper)) < 0.01 & conditionEnrich$enrich == Xname),"color"] <- colorX
+  conditionEnrich[which(as.numeric(as.vector(conditionEnrich$phyper)) < 0.01 & conditionEnrich$enrich == Yname),"color"] <- colorY
+  return(conditionEnrich)
+}
+
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  "inputSeurat",  "i",1, "character", "input seurat object with cluster column names numclust",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "inputStream", "s", 1, "character", "input stream output in csv format",
+  "colMetaData", "m", 1, "character", "List all the column from inputStream that you want to transfer (sep by +)",
+  "refPseudotime", "r", 1, "character", "Name of the reference pseudotime",
+  "treeShape", "t", 1, "character", "Level of the pseudotime tree, from the root to the leafs (sep by +)"
+), byrow=TRUE, ncol=5);
+
+
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputSeurat)) {
+  cat("Create influence graph from regulon table")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+## ---------------------------------------------------------------------
+## Read inputs
+## ---------------------------------------------------------------------
+
+# opt$inputSeurat <- "/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/RNA/Seurat4_integration/Analysis/seurat_annotated.rds"
+# opt$inputStream <- "/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/RNA/streamAnalysis/stream_metadata.csv"
+# opt$outdir <- "/shared/projects/scRNA_HSPC_Aging/GMP_PRARA/output/RNA/streamAnalysisSeurat/"
+# opt$colMetaData <- "branch_id_alias+S0_pseudotime+S1_pseudotime+S2_pseudotime+S3_pseudotime+S4_pseudotime+S5_pseudotime"
+# opt$refPseudotime <- "S1_pseudotime"
+# opt$treeShape <- "(S1, S0)+(S2, S0)+(S3, S0)+(S4, S3)+(S5, S3)"
+
+print(opt$colMetaData)
+print(opt$refPseudotime)
+print(opt$treeShape)
+
+# Read seurat object
+seurat <- readRDS(opt$inputSeurat)
+
+stream <- read.table(opt$inputStream, header = T, sep = ",", row.names = 1)
+
+pseudotimeMetadata <- strsplit(opt$colMetaData, split = "\\+")[[1]]
+
+treeShape <- strsplit(opt$treeShape, split = "\\+")[[1]]
+
+outdir <- opt$outdir
+
+print(treeShape)
+
+# Define colors
+colorTreatment <- c("#664CFF", "#FF8000")
+colorCluster <- c("#E69F00", "#CC79A7", "#0072B2", "#009E73", "#D55E00", "#56B4E9")
+colorPhases <- c(brewer.pal(9,"RdPu"))[c(3,6,9)]
+colorBranches <- c("#332288", "#117733", "#DDCC77", "#AA4499", "#88CCEE")
+
+#----------------------------------------------------#
+#  Prepare the seurat object for pseudotime analysis
+#----------------------------------------------------#
+
+dir.create(outdir, recursive = T)
+
+DefaultAssay(seurat) <- "RNA"
+
+# Add all the metadata from stream output to seurat
+seurat@meta.data[,pseudotimeMetadata] <- stream[rownames(seurat@meta.data), pseudotimeMetadata]
+
+# Setup default pseudotime and make it 100 bigger
+seurat$Pseudotime <- seurat@meta.data[,opt$refPseudotime]
+seurat$Pseudotime_100 <- seurat@meta.data$Pseudotime*100
+
+# Rename branch to easier name
+seurat@meta.data$branch_id_alias <- gsub(pattern = "'", replacement = "", x = seurat@meta.data$branch_id_alias)
+seurat@meta.data$branch_id_alias <- factor(seurat@meta.data$branch_id_alias, levels = treeShape)
+
+print("Rename trajectory name")
+Idents(seurat) <- "branch_id_alias"
+new.traj.name <- LETTERS[1:length(treeShape)]
+
+names(new.traj.name) <- levels(seurat)
+
+seurat <- RenameIdents(seurat, new.traj.name)
+seurat@meta.data$BranchName <- Idents(seurat)
+
+print("Renaming done")
+Idents(seurat) <- "FinalCluster"
+
+# seurat@meta.data$FinalBranch <- FALSE
+# seurat@meta.data[which(seurat@meta.data$BranchName %in% c("D", "E", "B")), "FinalBranch"] <- TRUE
+
+#----------------------------------------------------#
+#  Pseudotime ploting
+#----------------------------------------------------#
+
+
+rescale_pseudotime <- rescale(seurat@meta.data[order(seurat$Pseudotime_100, decreasing = F), "Pseudotime_100"], to = c(0,1))
+UMAP_pseudotime_rescale <- FeaturePlot(seurat, features = "Pseudotime_100", pt.size = 0.4) + scale_color_viridis_c(option = "C", values = rescale_pseudotime)
+
+UMAP_branchName <- DimPlot(seurat, group.by = "BranchName", cols = colorBranches, pt.size = 0.4)
+
+ggsave(paste0(outdir, 'UMAP_Branches', '.png'), plot = UMAP_branchName, device = 'png', path = NULL, 
+       scale = 1, width = 20, height = 20, units = 'cm', dpi = 300)
+
+ggsave(paste0(outdir, 'UMAP_Pseudotime_rescale', '.png'), plot = UMAP_pseudotime_rescale, device = 'png', path = NULL, 
+       scale = 1, width = 20, height = 20, units = 'cm', dpi = 300)
+
+summary_branch_clust <- ddply(seurat@meta.data,~FinalCluster + BranchName + condition + phases, nrow)
+summary_branch_clust$condition <- factor(summary_branch_clust$condition, levels = c("Treated", "Control"))
+
+propExpect <- table(seurat@meta.data$condition)/length(seurat@meta.data$condition)[]
+
+enrich_branch <- getEnrichPopClust(hspc.combined = seurat, Xname = "Control", Yname = "Treated", colorX = colorTreatment[1],
+                                   colorY = colorTreatment[2], metaCol = "condition", clustCol = "BranchName")
+
+bp_treatmentBranch <- ggplot(data.frame(summary_branch_clust), aes(fill = condition,y = V1, x=BranchName)) +
+  geom_bar( stat="identity", position="fill")+
+  scale_fill_manual( values= rev(colorTreatment))+
+  scale_y_continuous(name = "Sample (%)", labels = c(0,25,50,75,100))+
+  ylab(label = "")+xlab(label = "") + coord_flip()+
+  theme(legend.title=element_blank(), axis.text.y = element_text(colour = enrich_branch[,"color"])) +
+  geom_hline(yintercept = propExpect["Control"])
+
+
+bp_branchState <- ggplot(data.frame(summary_branch_clust), aes(fill = BranchName,y = V1, x=FinalCluster)) +
+  geom_bar( stat="identity", position="fill")+
+  scale_fill_manual( values= colorBranches)+
+  scale_y_continuous(name = "Sample (%)", labels = c(0,25,50,75,100))+
+  ylab(label = "")+xlab(label = "") + coord_flip()+
+  theme(legend.title=element_blank())
+
+bp_stateBranch <- ggplot(data.frame(summary_branch_clust), aes(fill = FinalCluster,y = V1, x=BranchName)) +
+  geom_bar( stat="identity", position="fill")+
+  scale_fill_manual( values= colorCluster)+
+  scale_y_continuous(name = "Sample (%)", labels = c(0,25,50,75,100))+
+  ylab(label = "")+xlab(label = "") + coord_flip()+
+  theme(legend.title=element_blank())
+
+bp_cellCycleBranch <- ggplot(data.frame(summary_branch_clust), aes(fill = phases,y = V1, x=BranchName)) +
+  geom_bar( stat="identity", position="fill")+
+  scale_fill_manual( values= colorPhases)+
+  scale_y_continuous(name = "Sample (%)", labels = c(0,25,50,75,100))+
+  ylab(label = "")+xlab(label = "") + coord_flip()+
+  theme(legend.title=element_blank())
+
+ggsave(paste0(outdir, 'BP_treatmentPerBranch', '.png'), plot = bp_treatmentBranch, device = 'png', path = NULL, 
+       scale = 1, width = 10, height = 10, units = 'cm', dpi = 300)
+ggsave(paste0(outdir, 'BP_branchPerCluster', '.png'), plot = bp_branchState, device = 'png', path = NULL, 
+       scale = 1, width = 10, height = 10, units = 'cm', dpi = 300)
+ggsave(paste0(outdir, 'BP_clusterPerBranch', '.png'), plot = bp_stateBranch, device = 'png', path = NULL, 
+       scale = 1, width = 10, height = 10, units = 'cm', dpi = 300)
+ggsave(paste0(outdir, 'BP_cellCyclePerBranch', '.png'), plot = bp_cellCycleBranch, device = 'png', path = NULL, 
+       scale = 1, width = 10, height = 10, units = 'cm', dpi = 300)
+
+saveRDS(seurat, paste0(outdir, "seuratPseudotime.rds"))
\ No newline at end of file
diff --git a/R_src/testGetopt.R b/R_src/testGetopt.R
new file mode 100644
index 0000000000000000000000000000000000000000..0b399be9a810289547e623b2cb4f393722cb89d6
--- /dev/null
+++ b/R_src/testGetopt.R
@@ -0,0 +1,103 @@
+##------------------------------------------------------------------------------
+## L. Herault
+##------------------------------------------------------------------------------
+
+## -----------------------------------------------------------------------------
+## Libraries
+## -----------------------------------------------------------------------------
+
+suppressMessages(library(monocle))
+suppressMessages(library(Seurat))
+suppressMessages(library(BiocParallel))
+suppressMessages(library(getopt))
+suppressMessages(library(gridExtra))
+suppressMessages(library(gProfileR))
+suppressMessages(library(RColorBrewer))
+suppressMessages(library(readxl))
+suppressMessages(library(stringr))
+suppressMessages(library(scales))
+suppressMessages(library(cowplot))
+suppressMessages(library(sctransform))
+suppressMessages(library(stringr))
+suppressMessages(library(plyr))
+
+
+source("R_src/getSigPerCells.R")
+source("R_src/Enrichment.R")
+source("R_src/funForSeurat.R")
+
+
+# Seurat 3 integration workflow
+
+## -----------------------------------------------------------------------------
+## Command line args
+## -----------------------------------------------------------------------------
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputFiles',  'i', 1, "character", "REQUIRED: 10X dataset paths prepared as hspc.combined object (.RDS generated by prepare_data.R) separated by +",
+  'signaturesFile', 's',1, "character", "REQUIRED: signatures rds file",
+  'outdir',     'o',1, "character", 'Outdir path (default ./)',
+  "num_dim_CCA", 'q',1,"numeric", "First n dimensions of CCA to use for FindIntegrationAnchors Seurat function (15 by default)",
+  "num_dim_weight", 'w',1,"numeric", "First n dimensions to use for IntegrateData Seurat function (15 by default)",
+  "num_dim",      'n',1,"numeric", "First n dimensions of PCA to use for clustering, UMAP and TSNE (15 by default)",
+  "num_dim_integrated",'N',1,"numeric", "Number of PCA dimension computed to analyse integrated data (40 by default)",
+  "cores",        'c',1, "numeric", "Number of cores to use for ordering (for differencially expressed gene between clusters test)",
+  "resolution",    'r',1, "numeric", "resolution for hspc.combined clustering",
+  "correction",      "b",1,"character", "Covariable to use as blocking factor (eg one or several columns of pData: betch, cell cycle phases... separated by +)",
+  "logfc_threshold", "l", 1, "numeric", "logfc threshold for finding cluster markers (1 cluster vs all deg) 0.25 by default",
+  "rodriguezSig", "k", 1, "character", "path for xls file of Rodriguez result",
+  "norm_method", "z",1, "character", "normalization method, logNorm (by default) or sctransform",
+  "reusePca", "p", 0, "character","re use pca calculated before when caculating anchor weights for each dataset default to FALSE (permit to correct for cell cycle before integration)"
+), byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help) | is.null(opt$inputFiles)) {
+  cat("Perform Seurat 3 integration workflow, then cluster the cell with seurat 3 at different resolutions")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+print(opt$num_dim)
+print(opt$num_dim_integrated)
+print(opt$num_dim_CCA)
+print(opt$num_dim_weigth)
+#set default arguments values
+
+if (is.null(opt$resolution)) {
+  opt$resolution <- 0.6
+}
+
+if (is.null(opt$logfc_threshold)) {
+  opt$logfc_threshold <- 0.25
+}
+
+if (is.null(opt$num_dim_CCA)) {
+  opt$num_dim_CCA <- 15
+}
+
+if (is.null(opt$num_dim_weight)) {
+  opt$num_dim_weight <- 15
+}
+
+if (is.null(opt$num_dim)) {
+  opt$num_dim <- 15
+}
+
+if (is.null(opt$num_dim_integrated)) {
+  opt$num_dim_integrated <- 40
+}
+
+if (is.null(opt$norm_method)) {
+  opt$norm_method <- "logNorm"
+}
+
+if (is.null(opt$reusePca)) {
+  opt$reusePca <- FALSE
+}
+
diff --git a/R_src/transferFuns.R b/R_src/transferFuns.R
new file mode 100644
index 0000000000000000000000000000000000000000..19dd974977814810236a41c930bcb9c59c49bc2a
--- /dev/null
+++ b/R_src/transferFuns.R
@@ -0,0 +1,76 @@
+
+integrateByConditionOneAtacRnaAll <- function(atac,
+                                              rna0,
+                                              clustCol ="numclust",
+                                              refAssayLabel = "RNA",
+                                              refAssayTransfer = "RNA",
+                                              condCol,
+                                              condition,
+                                              dimsLSI = c(1:30),
+                                              dimsCCA = c(1:30),
+                                              outdir = "./") {
+
+  
+  Idents(rna0) <- condCol
+  rna <- subset(rna0,idents = condition)
+  
+  ##Data preprocessing
+  
+  ## Here, we process the gene activity matrix in order to find anchors between cells in the scATAC-seq dataset and the scRNA-seq dataset.
+  rna$numclust <- rna@meta.data[,clustCol]
+  DefaultAssay(atac) <- "activities"
+  atac <- FindVariableFeatures(atac)
+  atac <- NormalizeData(atac)
+  atac <- ScaleData(atac)
+  
+  transfer.anchors <- FindTransferAnchors(reference = rna, query = atac, features = VariableFeatures(object = rna), 
+                                          reference.assay = refAssayLabel, query.assay = "activities", reduction = "cca")
+  
+  ## To improve discard first integratedLSI ?
+  celltype.predictions <- TransferData(anchorset = transfer.anchors, refdata = rna$numclust, dims = dimsLSI,
+                                       weight.reduction = atac[["lsi"]])
+  atac <- AddMetaData(atac, metadata = celltype.predictions)
+  
+  png(paste0(outdir,"prediction.score.max.png"))
+  hist(atac$prediction.score.max)
+  abline(v = 0.5, col = "red")
+  dev.off()
+  
+  ## We filter atac cells with a weak prediction score for rna cluster transfer
+  atac.filtered <- subset(atac, subset = prediction.score.max > 0.4)
+  atac.filtered$predicted.id <- factor(atac.filtered$predicted.id, levels = levels(rna$numclust))  # to make the colors match
+  
+  p1 <- DimPlot(atac.filtered, group.by = "predicted.id", label = TRUE, repel = TRUE) + ggtitle("scATAC-seq cells") + 
+    NoLegend() + scale_colour_hue(drop = FALSE)
+  p2 <- DimPlot(rna, group.by = "numclust", label = TRUE, repel = TRUE) + ggtitle("scRNA-seq cells") + 
+    NoLegend()
+  
+  png(paste0(outdir,"predictedATACvsRNAclust.png"),units = "in",res = 300,height=6,width =10)
+  grid.arrange(p1,p2,nrow = 1)
+  dev.off()
+  
+  ### Input with all rna data as ref
+  # note that we restrict the imputation to variable genes from scRNA-seq 
+  genes.use <- VariableFeatures(rna0) # all genes of the slot if integrated 
+  refdata <- GetAssayData(rna0, assay = refAssayTransfer, slot = "data")[genes.use, ]
+  
+  # refdata (input) contains a scRNA-seq expression matrix for the scRNA-seq cells.  imputation
+  # (output) will contain an imputed scRNA-seq matrix for each of the ATAC cells
+  transfer.anchors.2 <- FindTransferAnchors(reference = rna0, 
+                                            query = atac.filtered, 
+                                            features = VariableFeatures(object = rna0), 
+                                            reference.assay = refAssayTransfer, 
+                                            query.assay = "activities", 
+                                            reduction = "cca",
+                                            dims = dimsCCA)
+  
+  imputation <- TransferData(anchorset = transfer.anchors.2, 
+                             refdata = refdata, 
+                             weight.reduction = atac.filtered[["lsi"]],
+                             dims = dimsLSI)
+  
+  # this line adds the imputed data matrix to the atac object
+  atac.filtered[[refAssayTransfer]] <- imputation
+  
+  return(atac.filtered)
+}
diff --git a/R_src/transferRnaAtacCL.R b/R_src/transferRnaAtacCL.R
new file mode 100644
index 0000000000000000000000000000000000000000..48a8f602e44608f33decaa72b30375a28c455162
--- /dev/null
+++ b/R_src/transferRnaAtacCL.R
@@ -0,0 +1,170 @@
+
+suppressMessages(library(Signac))
+suppressMessages(library(Seurat))
+suppressMessages(library(getopt))
+suppressMessages(library(gridExtra))
+suppressMessages(library(scales))
+suppressMessages(library(grid))
+suppressMessages(library(S4Vectors))
+suppressMessages(library(patchwork))
+suppressMessages(library(ggplot2))
+
+set.seed(2021)
+
+source("R_src/transferFuns.R")
+
+
+spec = matrix(c(
+  'help',        'h', 0, "logical",   "Help about the program",
+  'inputRNAintegrated',  'i', 1, "character", "REQUIRED: scRNA integrated dataset path prepared as seurat object (.RDS generated by seurat4_integration.R)",
+  'inputAtac1', 'a',1, "character", "REQUIRED: scATAC dataset path prepared as seurat object (.RDS generated by SignacSmpCL.R)",
+  'inputAtac2',     'b',1, "character", "REQUIRED: scATAC dataset path prepared as seurat object (.RDS generated by SignacSmpCL.R)",
+  "num_dim_CCA", 'q',1,"numeric", "First n dimensions of CCA to use for FindIntegrationAnchors and TransferData Seurat functions (30 by default)",
+  "num_dim_LSI", 'l',1,"numeric", "First n dimensions of LSI to use for the atac query in TransferData Seurat functions (30 by default)",
+  "discardFirstLsiDim", 'd',0,"logical", "discard first dim lsi for the query in TransferData function",
+  "num_dim",      'n',1,"numeric", "First n dimensions of PCA to use for clustering, UMAP and TSNE (15 by default)",
+  "num_dim_integrated",'N',1,"numeric", "Number of PCA dimension computed to analyse integrated data (40 by default)",
+  "clustCol",    'r',1, "character", "cluster column name to transfer to atac data",
+  "condClust",    'c',1, "character", "condition column name present in all inputs",
+  "outdir",      "o",1,"character", "outdir path"),
+
+byrow=TRUE, ncol=5);
+
+opt = getopt(spec)
+
+# if help was asked, print a friendly message
+# and exit with a non-zero error code
+
+args <- commandArgs()
+if ( !is.null(opt$help)) {
+  cat("Perform Seurat 3 integration workflow, then cluster the cell with seurat 3 at different resolutions")
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+#set default arguments values
+
+if (is.null(opt$clustCol)) {
+  opt$clustCol <- "numclust"
+}
+
+if (is.null(opt$condCol)) {
+  opt$condCol <- "condition"
+}
+
+
+if (is.null(opt$num_dim_CCA)) {
+  opt$num_dim_CCA <- 30
+}
+
+dimsCCA <- c(1:opt$num_dim_CCA)
+
+if (is.null(opt$num_dim_LSI)) {
+  opt$num_dim_LSI <- 30
+}
+
+if (is.null(opt$discardFirstLsiDim)) {
+  dimsLSI <- c(1:opt$num_dim_LSI)
+} else {
+  dimsLSI <- c(2:opt$num_dim_LSI)
+}
+
+if (is.null(opt$num_dim_integrated)) {
+  opt$num_dim_integrated <- 40
+}
+
+if (is.null(opt$num_dim)) {
+  opt$num_dim <- 15
+}
+
+
+print(opt)
+
+rna0 <- readRDS(opt$inputRNAintegrated)
+atac1 <- readRDS(opt$inputAtac1)
+atac2 <- readRDS(opt$inputAtac2)
+
+print("add metadata...")
+
+
+rna0$orig.type <- "RNA"
+atac1$orig.type <- "ATAC"
+atac2$orig.type <- "ATAC"
+
+print("set idents...")
+
+Idents(atac1) <- opt$condCol
+Idents(atac2) <- opt$condCol
+
+Idents(rna0) <- opt$condCol
+
+print("set numclust...")
+
+rna0$numclust <- rna0@meta.data[,opt$clustCol]
+print("retrieve conditions...")
+cond <- unique(rna0@meta.data[,opt$condCol])
+print(cond)
+print("cluster transfer to first ATAC smp...")
+
+
+atac1 <- integrateByConditionOneAtacRnaAll(atac = atac1,
+                                           rna = rna0,
+                                           clustCol = opt$clustCol,
+                                           refAssayLabel = "RNA",
+                                           refAssayTransfer = "integrated",
+                                           condCol = opt$condCol,
+                                           condition = cond[1],
+                                           dimsLSI = dimsLSI,
+                                           dimsCCA = dimsCCA,
+                                           outdir = opt$outdir) 
+
+
+atac2 <- integrateByConditionOneAtacRnaAll(atac = atac2,
+                                           rna = rna0,
+                                           clustCol = opt$clustCol,
+                                           refAssayLabel = "RNA",
+                                           refAssayTransfer = "integrated",
+                                           condCol = opt$condCol,
+                                           condition = cond[2],
+                                           dimsLSI = dimsLSI,
+                                           dimsCCA = dimsCCA,
+                                           outdir = opt$outdir)
+
+
+## merge all the objects
+atacAll <- merge(x = atac1, y = atac2)
+coembed <- merge(x= rna0,y=atacAll)
+
+## Reduce dim on integrated (imputed) slot
+genes.use <- VariableFeatures(rna0) # Will be all genes if integrated
+
+coembed <- ScaleData(coembed, features = genes.use, do.scale = FALSE)
+coembed <- RunPCA(coembed, features = genes.use, verbose = FALSE)
+
+png(paste0(opt$outdir,"elbowAll.png"))
+ElbowPlot(object = coembed,ndims = opt$num_dim_integrated)
+dev.off()
+
+coembed <- RunUMAP(coembed, dims = 1:opt$num_dim)
+coembed$numclust <- ifelse(coembed$orig.type == "RNA", coembed$numclust, coembed$predicted.id)
+
+coembed$condType <- paste(coembed$condition,coembed$orig.type)  
+
+p1 <- DimPlot(coembed, group.by = "numclust")
+p2 <- DimPlot(coembed, split.by = "condType", label = TRUE, repel = TRUE,ncol = 2) 
+
+png(paste0(opt$outdir,"/elbowAll.png"),height = 800,width=1600)
+grid.arrange(p1,p2,ncol = 2)
+dev.off()
+
+saveRDS(atacAll, paste0(opt$outdir, "/atac_integrated.rds"))
+saveRDS(coembed,paste0(opt$outdir,"/coembedAll.rds"))
+
+
+
+
+
+
+
+
+