enhgrid

import "gitlab.com/Grouumf/enhlinktools/enhgrid"

Library that compiles the enhgrid executable

enhgrid performs enhlink on multiple processes for a range of hyperparameter values. enhgrid generates output files for each hyperparameter combination. The following parameters can accept multiple values:

-downsample <int>
-n_boot <int>
-depth <int>
-max_features <int>
-secondOrderMaxFeat <int>
-threshold <float>
-min_matsize <int>
-min_leafsize <int>
-merging_cutoff <int>
-neighborhood <int>
-maxFeatType <string/int/float>
-lambda1 <float>
-lambda2 <float>
-threads <int>

Multiple values can be passed as input using either comma or space: for example -depth 2,3,4 or -depth “2 3 4”

Enhgrid can accept the exact same parameters than Enhlink with additional functionalities:

## Parameters unique to enhgrid:

-randomNTargets <int>     which allows to pick, for each grid iteration, N tatgets at random from the index and process them instead of the full list of targets
-repetition <int>     Number of repetition to be performed for each iteration (default: 1)
-processes <int>  Number of Enhlink processes to be launched in parallel (default: 1)
-splitTargetList Split the list of genes through the n processes

<<<<<<<<<<<<<<<<<<<< WARNING >>>>>>>>>>>>>>>>>>>> As of March 20 2024, Enhlink v0.21.0, we Changed some of Enhgrid’s parameters names for clarity and consistency purpose.

Below are the list of changes: (version < 0.21.0) -> (version >= 0.21.0) cluster -> clusters promoter -> gtf genes -> targets gene -> target isGeneExpr -> isExpr rmPeaksInPromoter -> rmPeaksInTargets splitGeneList -> splitTargetList randomNGenes -> randomNTargets onlyPositiveLink -> linkType <<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>

USAGE:

enhgrid -mat <file> -xgi <file> -ygi <file> -gtf <file> -out <path> -tag <string>
        -mat2 <file> -xgi2 <file> -ygi2 <file>   # IF PASSING A GENE MATRIX FILE
        -target <string>  # IF FOCUSING ON ONE TARGET
        -targets <file>  # IF FOCUSING ON A LIST OF TARGETS
        -isExpr # IF THE GENE MATRIX IS A EXPRESSION MATRIX
        -covariates <file> -xgi_subset <file>  -ygi_subset <file> -clusters <file>  # OPTIONAL
        -downsample <int> -threads <int> -n_boot <int> -depth <int> -max_features <int>  # OPTIONAL
        -threshold <float> min_matsize <int> -min_leafsize <int> -merging_cutoff <int>   # OPTIONAL
        -format {coo, mtx, cellRanger} -keep_sparse -maxFeatType <string/int/float>  # OPTIONAL
        -rmPeaksInTargets -linkType {"all", "positive", "negative"} -secondOrder -ignoreEnhancerWeight  # OPTIONAL
        -neighborhood <int> -secondOrderMaxFeat <int> -uniformSampling # OPTIONAL
        -randomGenes <int> -repetition <int> -processes <int> --splitTargetList # OPTIONAL and specific to enhgrid

please check enhgrid -h and the tutorial and introduction sections for a more precise description of the input parameters

Index

Variables
func analyseOneGeneList(enhObj enhlinkobject.EnhlinkObject, processID int, bucket map[string]bool, waiting *sync.WaitGroup, guard chan bool)
func getGeneBucketsFromGene(geneFile utils.Filename, processes int) ([]map[string]bool, int)
func getGeneBucketsFromPromoter(plist *enhlinkobject.PromoterList, processes int) ([]map[string]bool, int)
func launchOneIterThread(isOver bool, count int, attributes enhlinkobject.TreeAttributes, enhMat, geneMat, covMat *matrix.SparseBoolMatrix, floatMat *matrix.SparseFloatMatrix, plist *enhlinkobject.PromoterList, writer *io.WriteCloser, waiting *sync.WaitGroup, mutex *sync.Mutex, guard chan bool)
func main()
func mergeBucketResultsFile(outTag string, clusterList []string, nbBuckets int)
func mergeOneSetOfBucketFiles(outTag, cluster, ext string, nbBuckets int)
func processNGeneLists(attributes enhlinkobject.TreeAttributes, enhMat, geneMat, covMat *matrix.SparseBoolMatrix, floatMat *matrix.SparseFloatMatrix, plist *enhlinkobject.PromoterList, outTag string)
func reduce(combinations [][][]int) (res [][]int)
func splitGenesToBucket(geneMap map[string]uint, processes int) (geneBuckets []map[string]bool, nbGenes int)
func stringToFloatArray(stringArr, option string) (outArr []float64)
func stringToIntArray(stringArr, option string) (outArr []int)
func stringToMaxFeatTypeArray(stringArr, option string) (outArr []enhlinkobject.MaxFeaturesType)
func testIfRequiredFilesExist()
type paramArrays

Variables

CLUSTERFILE cluster file

var CLUSTERFILE utils.Filename

DOWNSAMPLEARR Downsample the number of samples to use

var DOWNSAMPLEARR string

GENE gene

var GENE string

IGNOREENHANCERWEIGHT Ignore Enhancers weight (the ratio of accessibility) in the computation of the modified Information Gain

var IGNOREENHANCERWEIGHT bool

INPUTFORMAT iput matrix format

var INPUTFORMAT string

INPUTGENEMAT input matrix name for the gene matrix (input)

var INPUTGENEMAT utils.Filename

INPUTMAT input matrix name (input)

var INPUTMAT utils.Filename

ISGENEEXPR using gene expression for the gene mat

var ISGENEEXPR bool

KEEPSPARSE Keep the main ColMat matrix sparse. Usefull for memory reason if background is very large

var KEEPSPARSE bool

LAMBDA1ARR Lambda parameter of a poisson distribution, that controls the amount of dropouts of the simulated variables

var LAMBDA1ARR string

LAMBDA2ARR Lambda parameter of a poisson distribution, that controls the amount of false positives in the simulated variables

var LAMBDA2ARR string

LINKTYPE Which link to keep {“all”, “positive”, “negative”}

var LINKTYPE string

MAXFEATURESARR Maximum number of explanatory features per bootstrap model.

var MAXFEATURESARR string

MAXFEATURESTYPEARR max feature type

var MAXFEATURESTYPEARR string

MERGINGCUTOFF merging cutoff for closeby promoters

var MERGINGCUTOFF int

METADATA optional covariate matrix

var METADATA utils.Filename

MINLEAFSIZEARR Min size of leaf

var MINLEAFSIZEARR string

MINMATSIZEARR Min matrix size (int)

var MINMATSIZEARR string

NBBOOTARR Number of boostrap

var NBBOOTARR string

NBPROCESSES Number of Enhlink processes to be launched in parallel

var NBPROCESSES int

NBSIMFEATURESARR Number of simulated features to use

var NBSIMFEATURESARR string

NBTHREADSARR number of internal threads for each enhlink computation

var NBTHREADSARR string

NEIGHBORHOODARR number of internal threads

var NEIGHBORHOODARR string

ONLYSIM only perform simulation

var ONLYSIM bool

OUTDIR output directory

var OUTDIR string

OUTTAG output files tag

var OUTTAG string

PROMOTERFILE promoter file

var PROMOTERFILE utils.Filename

RANDOMNBGENES random subset of genes to analyze

var RANDOMNBGENES int

REPETITION Number of repetition to be performed for each iteration (default: 1)

var REPETITION int

RMPEAKSINPROMOTERS Remove peaks within promoter boundaries

var RMPEAKSINPROMOTERS bool

SECONDORDER compute second order links - covar correlation

var SECONDORDER bool

SECONDORDERMAXFEATURESARR Maximum number of explanatory features per bootstrap model for the second order model.

var SECONDORDERMAXFEATURESARR string

SHOWVERSION show version and quit

var SHOWVERSION bool

SPLITGENELIST Split the gene list through the processes

var SPLITGENELIST bool

THRESHOLDARR Significance level

var THRESHOLDARR string

TREEDEPTHARR Max tree level

var TREEDEPTHARR string

UNIFORMSAMPLING Randomly sample the cells to have an uniform covariate distribution for each bootstrap. Needs a covariate matrix

var UNIFORMSAMPLING bool

XGI row index for input mat

var XGI utils.Filename

XGIGENE row index for input gene mat

var XGIGENE utils.Filename

XGISUBSET row index subset for input mat

var XGISUBSET utils.Filename

YGI column index for input mat

var YGI utils.Filename

YGIGENE column index for input gene mat

var YGIGENE utils.Filename

YGIGENESUBSET column index subset for input gene mat

var YGIGENESUBSET utils.Filename

YGISUBSET column index subset for input mat

var YGISUBSET utils.Filename

func analyseOneGeneList 

func analyseOneGeneList(enhObj enhlinkobject.EnhlinkObject, processID int, bucket map[string]bool, waiting *sync.WaitGroup, guard chan bool)

func getGeneBucketsFromGene 

func getGeneBucketsFromGene(geneFile utils.Filename, processes int) ([]map[string]bool, int)

func getGeneBucketsFromPromoter 

func getGeneBucketsFromPromoter(plist *enhlinkobject.PromoterList, processes int) ([]map[string]bool, int)

func launchOneIterThread 

func launchOneIterThread(isOver bool, count int, attributes enhlinkobject.TreeAttributes, enhMat, geneMat, covMat *matrix.SparseBoolMatrix, floatMat *matrix.SparseFloatMatrix, plist *enhlinkobject.PromoterList, writer *io.WriteCloser, waiting *sync.WaitGroup, mutex *sync.Mutex, guard chan bool)

func main 

func main()

func mergeBucketResultsFile 

func mergeBucketResultsFile(outTag string, clusterList []string, nbBuckets int)

func mergeOneSetOfBucketFiles 

func mergeOneSetOfBucketFiles(outTag, cluster, ext string, nbBuckets int)

func processNGeneLists 

func processNGeneLists(attributes enhlinkobject.TreeAttributes, enhMat, geneMat, covMat *matrix.SparseBoolMatrix, floatMat *matrix.SparseFloatMatrix, plist *enhlinkobject.PromoterList, outTag string)

func reduce 

func reduce(combinations [][][]int) (res [][]int)

func splitGenesToBucket 

func splitGenesToBucket(geneMap map[string]uint, processes int) (geneBuckets []map[string]bool, nbGenes int)

func stringToFloatArray 

func stringToFloatArray(stringArr, option string) (outArr []float64)

func stringToIntArray 

func stringToIntArray(stringArr, option string) (outArr []int)

func stringToMaxFeatTypeArray 

func stringToMaxFeatTypeArray(stringArr, option string) (outArr []enhlinkobject.MaxFeaturesType)

func testIfRequiredFilesExist 

func testIfRequiredFilesExist()

type paramArrays 

type paramArrays struct {
    downsample             []int
    nbBoot                 []int
    depth                  []int
    maxFeatures            []int
    secondOrderMaxFeatures []int
    minMatsize             []int
    minLeafsize            []int
    nbThreads              []int
    neighborhood           []int
    nbSimFeatures          []int

    maxFeatType []enhlinkobject.MaxFeaturesType

    threshold        []float64
    lambda1, lambda2 []float64

    iterators            map[string]int
    nbSteps, currentStep int
    keys                 []string
    paramCombinations    [][]int
}

func (*paramArrays) generateAllCombination 

func (pa *paramArrays) generateAllCombination()

func (*paramArrays) init 

func (pa *paramArrays) init()

func (*paramArrays) initIterators 

func (pa *paramArrays) initIterators()

func (*paramArrays) iter 

func (pa *paramArrays) iter(attributes enhlinkobject.TreeAttributes, tStart time.Time) (newAttr enhlinkobject.TreeAttributes, isOver bool)

func (*paramArrays) returnLastThreadVal 

func (pa *paramArrays) returnLastThreadVal() int

enhlink

import "gitlab.com/Grouumf/enhlinktools/enhlink"

Library that compiles the enhlink executable

enhlink inferes enhancer / promoter co-accessibilities (links) using random forests of ID3 trees and Information gain.

enhlink main inputs are:

a) a (cell x peak) sparse matrix,
b) a 4-columns promoter TSV file <chrID, start, stop, geneID> ,
c) an optional (cell x gene) sparse matrix if the gene activity cannot be inferred from the peaks of the the first matrix and the promoter regions. This matrix can either be interpreted as boolean (e.g. the promoter of a given gene is either accessible or not for a given cell), or as a float matrix using the -isExpr option, which reflects the gene expression (for example in a context of a scATAC-seq/RNA-seq multi-omic study)

In addition, covariates (cell x covariates) and clusters (cell x clusterID) TSV file can be provided. Finally, multiple optional parameters can be set to fine tune the speed, accuracies, and range of the models.

<<<<<<<<<<<<<<<<<<<< WARNING >>>>>>>>>>>>>>>>>>>> As of March 20 2024, Enhlink v0.21.0, we Changed some of Enhlink’s parameters names for clarity and consistency purpose.

Below are the list of changes: (version < 0.21.0) -> (version >= 0.21.0) cluster -> clusters promoter -> gtf genes -> targets gene -> target isGeneExpr -> isExpr rmPeaksInPromoter -> rmPeaksInTargets onlyPositiveLink -> linkType <<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>

USAGE:

enhlink -mat <file> -xgi <file> -ygi <file> -promoter <file> -out <path> -tag <string>
        -mat2 <file> -xgi2 <file> -ygi2 <file>   # IF PASSING A GENE MATRIX FILE
        -target <string>  # IF FOCUSING ON ONE TARGET
        -targets <file>  # IF FOCUSING ON A LIST OF TARGETS
        -isExpr # IF MATRIX 2 IS A EXPRESSION MATRIX
        -covariates <file> -xgi_subset <file>  -ygi_subset <file> -cluster <file>  # OPTIONAL
        -downsample <int> -threads <int> -n_boot <int> -depth <int> -max_features <int>  # OPTIONAL
        -threshold <float> min_matsize <int> -min_leafsize <int> -merging_cutoff <int>   # OPTIONAL
        -format {coo, mtx, cellRanger} -keep_sparse -maxFeatType <string/int/float>  # OPTIONAL
        -rmPeaksInTargets -linkType {"all", "positive", "negative"} -secondOrder -ignoreEnhancerWeight  # OPTIONAL
        -neighborhood <int> -secondOrderMaxFeat <int> -uniformSampling # OPTIONAL

please check enhlink -h and the tutorial and introduction sections for a more precise description of the input parameters

Variables

CLUSTERFILE cluster file

var CLUSTERFILE utils.Filename

DOWNSAMPLE Downsample the number of samples to use

var DOWNSAMPLE int

GENE gene

var GENE string

IGNOREENHANCERWEIGHT Ignore Enhancers weight (the ratio of accessibility) in the computation of the modified Information Gain

var IGNOREENHANCERWEIGHT bool

INPUTFORMAT iput matrix format

var INPUTFORMAT string

INPUTGENEMAT input matrix name for the gene matrix (input)

var INPUTGENEMAT utils.Filename

INPUTMAT input matrix name (input)

var INPUTMAT utils.Filename

ISGENEEXPR using gene expression for the gene mat

var ISGENEEXPR bool

KEEPSPARSE Keep the main ColMat matrix sparse. Usefull for memory reason if background is very large

var KEEPSPARSE bool

LAMBDA1 Lambda parameter of a poisson distribution, that controls the amount of dropouts of the simulated variables

var LAMBDA1 float64

LAMBDA2 Lambda parameter of a poisson distribution, that controls the amount of false positives in the simulated variables

var LAMBDA2 float64

LINKTYPE Which link to keep {“all”, “positive”, “negative”}

var LINKTYPE string

MAXFEATURES Maximum number of explanatory features per bootstrap model.

var MAXFEATURES int

MAXFEATURESTYPE Maximum of features to be considered for a given tree. {"all", "sqrt", "log"}

var MAXFEATURESTYPE enhlinkobject.MaxFeaturesType

MERGINGCUTOFF merging cutoff for closeby promoters

var MERGINGCUTOFF int

METADATA optional covariate matrix

var METADATA utils.Filename

MINLEAFSIZE Min size of leaf

var MINLEAFSIZE int

MINMATSIZE Min matrix size (int)

var MINMATSIZE int

NBBOOT Number of boostrap

var NBBOOT int

NBSIMFEATURES Number of simulated features to use

var NBSIMFEATURES int

NBTHREADS number of internal threads

var NBTHREADS int

NEIGHBORHOOD number of internal threads

var NEIGHBORHOOD int

ONLYSIM only perform simulation

var ONLYSIM bool

OUTDIR output directory

var OUTDIR string

OUTTAG output files tag

var OUTTAG string

PROMOTERFILE promoter file

var PROMOTERFILE utils.Filename

RMPEAKSINPROMOTERS Remove peaks within promoter boundaries

var RMPEAKSINPROMOTERS bool

SECONDORDER compute second order links - covar correlation

var SECONDORDER bool

SECONDORDERMAXFEATURES Maximum number of explanatory features per bootstrap model for second order models

var SECONDORDERMAXFEATURES int

SHOWVERSION show version and quit

var SHOWVERSION bool

THRESHOLD Significance level

var THRESHOLD float64

TREEDEPTH Max tree level

var TREEDEPTH int

UNIFORMSAMPLING Randomly sample the cells to have an uniform covariate distribution for each bootstrap. Needs a covariate matrix

var UNIFORMSAMPLING bool

XGI row index for input mat

var XGI utils.Filename

XGIGENE row index for input gene mat

var XGIGENE utils.Filename

XGISUBSET row index subset for input mat

var XGISUBSET utils.Filename

YGI column index for input mat

var YGI utils.Filename

YGIGENE column index for input gene mat

var YGIGENE utils.Filename

YGIGENESUBSET column index subset for input gene mat

var YGIGENESUBSET utils.Filename

YGISUBSET column index subset for input mat

var YGISUBSET utils.Filename

var maxfeaturestypeStr string

func main 

func main()

func testIfRequiredFilesExist 

func testIfRequiredFilesExist()

enhlinkobject

import "gitlab.com/Grouumf/enhlinktools/enhlinkobject"

package enhlinkobject is a library to create an Enhlink Object and perform Enhlink analysis

Index

Variables
func AssertIfFileExists(filename, tag string)
func MergeClosePromoterRegions(mergingCutoff int, plist *PromoterList)
func pickNGenesAtRandom(nbGenes int, geneSet map[string]uint) (newGeneSet map[string]bool)
type EnhlinkObject
type LinkType
- func (t LinkType) IsValid() LinkType
type MaxFeaturesType
type PromoterList
- func LoadPromotersFile(fname utils.Filename) (plist PromoterList)
- func (pl *PromoterList) Len() int
type TreeAttributes
type pvalPoint

Variables

VERSION version of the current software

var VERSION = "0.21.3"

linkFormats possible options for matrix format

var linkTypes = [...]LinkType{allLink, posLink, negLink}

func AssertIfFileExists 

func AssertIfFileExists(filename, tag string)

AssertIfFileExists panic if err is nil from os.Stats

func MergeClosePromoterRegions 

func MergeClosePromoterRegions(mergingCutoff int, plist *PromoterList)

MergeClosePromoterRegions merge close promoters according to cutoff

func pickNGenesAtRandom 

func pickNGenesAtRandom(nbGenes int, geneSet map[string]uint) (newGeneSet map[string]bool)

type EnhlinkObject 

EnhlinkObject main enhlink object containing

type EnhlinkObject struct {

    //////////////// files and matrices ////////////
    // promoter file
    promoterFile utils.Filename
    // sparse matrix
    SparseMatrix matrix.SparseBoolMatrix
    // sparse matrix for gene activity
    SparseMatrixGene *matrix.SparseBoolMatrix
    // sparse float matrix for gene expression (substitute SparseMatrixGene )
    SparseMatrixFloat *matrix.SparseFloatMatrix
    // sparse matrix for covariates
    SparseMatrixCovar *matrix.SparseBoolMatrix

    //////////////// Internal variables //////////
    // current gene under study
    currentGene string
    // internal promoter map that defines all the current promoter regions
    // If matrix is constructed from peakMat, it is only 1 region
    currentPeaks map[utils.Peak]bool
    // peaks banned from beeing in the neighborhood matrix
    // because they are in a current promoter region
    blacklistedPeaks map[uint]bool

    // features on which to perform the analysis
    relevantFeatures []int
    // endog response binary vector
    ygiVector    []int //map[xgiID]value
    ygiCovVector []int //map[xgiID]value
    // endog response float vector
    ygiVectorFloat    []float64 //map[xgiID]value
    ygiCovVectorFloat []float64 //map[xgiID]value
    // Sum of ygi for all cluster
    ygiClusterSum map[string]float64
    // Remove peaks within promoter boundaries
    rmPeaksInPromoters bool

    // surrounding matrix
    surroundingPeaks []uint
    // Number of additional random features
    nbRandFeat int
    // Number of features used for the model
    nbFeatUsed int
    // is gene matrix provided
    isGeneMat bool
    // is gene expression matrix provided
    isFloatMat bool
    // is cov matrix provided
    isCovMat bool
    //starting time
    tStart time.Time

    bucketCovariates map[string][][]uint

    // valid peak and covariates before
    validYgi, validCovar map[string][]uint

    xgiCovMap []map[int]bool

    // Internal variable to indicate wether the 2nd order inference mode is activated
    isInferring2nd bool

    //verbose status
    verbose bool

    //////////////// Simulated variables ////////
    simColMat         matrix.MatColHash
    simYgiVector      []int
    simYgiVectorFloat []float64
    nbSimFeat         int
    isSim             bool
    lambda1           float64 // poisson param for dropout level
    lambda2           float64 // poisson param for false positive level

    //////// Float matrix attributes ///////////
    nonNullMean float64

    //////////////// TREE attributes //////////
    //treeAttributes object passed duringthe init
    attributes TreeAttributes
    // Number of internal threads to perform the multiple tasks
    nbThreads int
    // region in number of base pairs to define the surrounding enhancers
    surroundingSize int
    //Min matrix size
    minMatSize int
    // Max depth
    maxDepth int
    //Number of classes for ygi vector
    nbClass int
    // min leaf size of the tree
    minLeafSize int
    // number of boostrap
    nbBoot int
    // P-value threshold
    threshold float64
    // downsample the number of samples
    downsample int
    // Maximum number of explanatory features per bootstrap model.
    maxNbFeatures int
    // Maximum number of explanatory features per bootstrap model for second order models.
    secondOrderMaxFeat int
    //Ignore Enhancers weight (the ratio of accessibility) in the computation of the modified Information Gain
    ignoreEnhancerWeight bool
    // Keep the main ColMat matrix sparse. Usefull for memory reason if background is very large
    keepSparse bool
    // Identify the covariates associated with each inferred enhancer-promoter links
    secondOrder bool
    // Maximum of features to be considered for a given tree. {\"all\", \"sqrt\", \"log\"}* or int float/
    maxFeatType MaxFeaturesType
    // Only perform simulation
    onlySim bool
    // keep only links with positive correlations
    LinkType LinkType
    // uniform covariate sampling for each tree
    uniformSampling bool

    ////////// Sync objects /////////////////
    guard         chan bool
    mutex, mutex2 sync.Mutex
    waiting       sync.WaitGroup

    //promoter list map[gene]list<peak>
    Promoters *PromoterList
    // Reduced Intervals for ygi index map[chrID]interval
    YgiIntervalReduced utils.PeakIntervalTreeObject
    // Intervals for ygi index map[chrID]interval
    YgiInterval utils.PeakIntervalTreeObject
    // refined index of ygis not in promoters
    ygisNotInPromoters map[string]uint

    //////////////// Files objects //////////
    outDir, outTag string
    // map[cluster] -> file
    writers, writersCov, writers2ndOrder map[string]*io.WriteCloser
    // map[cluster] file name
    files, filesCov, files2ndOrder map[string]string
    // writer of simulated features results
    writerSim *io.WriteCloser
    fileSim   string
}

func (*EnhlinkObject) AnalyseAllGenesFromGeneMat 

func (eo *EnhlinkObject) AnalyseAllGenesFromGeneMat()

AnalyseAllGenesFromGeneMat analyse all genes from GeneMat

func (*EnhlinkObject) AnalyseAllPromoters 

func (eo *EnhlinkObject) AnalyseAllPromoters(geneSubset utils.Filename)

AnalyseAllPromoters analyse all genes from GeneMat

func (*EnhlinkObject) AnalyseNGenes 

func (eo *EnhlinkObject) AnalyseNGenes(geneMap map[string]bool, verbose bool)

AnalyseNGenes analysis one gene and close output files

func (*EnhlinkObject) AnalyseOneGene 

func (eo *EnhlinkObject) AnalyseOneGene(gene string)

AnalyseOneGene analysis one gene and close output files

func (*EnhlinkObject) AnalyseRandomSubsetFromGeneMat 

func (eo *EnhlinkObject) AnalyseRandomSubsetFromGeneMat(nSamples int)

AnalyseRandomSubsetFromGeneMat pick n genes at random from gene mat and analyse them

func (*EnhlinkObject) AnalyseRandomSubsetOfPromoters 

func (eo *EnhlinkObject) AnalyseRandomSubsetOfPromoters(geneSubsetFile utils.Filename, nSamples int)

AnalyseRandomSubsetOfPromoters analyse all genes from GeneMat

func (*EnhlinkObject) Init 

func (eo *EnhlinkObject) Init(mat matrix.SparseBoolMatrix, geneMat, covMat *matrix.SparseBoolMatrix, floatMat *matrix.SparseFloatMatrix, plist *PromoterList, attributes TreeAttributes)

Init init enhlinkObject with a sparse matrix and a promoter list

func (*EnhlinkObject) analyseOneGene 

func (eo *EnhlinkObject) analyseOneGene(gene string)

analyseOneGene analysis one gene and close output files

func (*EnhlinkObject) blacklistAllPeaksInPromoter 

func (eo *EnhlinkObject) blacklistAllPeaksInPromoter(targetPeaks []utils.Peak)

blacklistAllPeaksInPromoter init blacklistedPeaks with all peaks within any current prom region

func (*EnhlinkObject) computeOnePvalue 

func (eo *EnhlinkObject) computeOnePvalue(arr []float64, ygi uint, pvals *[]pvalPoint)

func (*EnhlinkObject) computePvalues 

func (eo *EnhlinkObject) computePvalues(scoreArr map[uint][]float64) (pvals []pvalPoint)

func (*EnhlinkObject) computeRecursiveIGFloat 

func (eo *EnhlinkObject) computeRecursiveIGFloat(xgiArr []uint, ygiMap map[uint]bool, bestScoreMap map[uint]float64, depth, lenXgi int)

func (*EnhlinkObject) computeRecursiveInformationGain 

func (eo *EnhlinkObject) computeRecursiveInformationGain(xgiArr []uint, ygiMap map[uint]bool, bestScoreMap map[uint]float64, depth, lenXgi int)

func (*EnhlinkObject) computeTrees 

func (eo *EnhlinkObject) computeTrees()

computeTrees Compute tree

func (*EnhlinkObject) computeTreesCovar 

func (eo *EnhlinkObject) computeTreesCovar()

func (*EnhlinkObject) computeTreesOneThreads 

func (eo *EnhlinkObject) computeTreesOneThreads(cluster string, ygiMap map[uint]bool, scoreArr map[uint][]float64)

computeTreesOneThreads Compute tree for one bootstrap index

func (*EnhlinkObject) computeTreesSim 

func (eo *EnhlinkObject) computeTreesSim(ygiMap map[uint]bool)

computeTreesSim Compute tree using simulated variables

func (*EnhlinkObject) createYgiMapForCovar 

func (eo *EnhlinkObject) createYgiMapForCovar(ygiToFocus uint, validYgi, validCovar []uint) (ygiMap map[uint]bool)

func (*EnhlinkObject) deferCloseFiles 

func (eo *EnhlinkObject) deferCloseFiles()

func (*EnhlinkObject) defineBoolYgiVectorFromPeakMat 

func (eo *EnhlinkObject) defineBoolYgiVectorFromPeakMat(intervals []interval.IntInterface)

func (*EnhlinkObject) defineClusterFloatYgiSum 

func (eo *EnhlinkObject) defineClusterFloatYgiSum()

defineClusterFloatYgiSum define the nb of xgi

func (*EnhlinkObject) defineClusterYgiSum 

func (eo *EnhlinkObject) defineClusterYgiSum()

defineClusterYgiSum define the nb of xgi

func (*EnhlinkObject) defineYgiVectorFromFloatMat 

func (eo *EnhlinkObject) defineYgiVectorFromFloatMat(gene string) (isValid bool)

defineYgiVectorFromGeneFloatMat define the endog ygi vectors using the gene float mat. return if the vector is valid

func (*EnhlinkObject) defineYgiVectorFromGeneMat 

func (eo *EnhlinkObject) defineYgiVectorFromGeneMat(gene string) (isValid bool)

defineYgiVectorFromGeneMat define the endog ygi vectors using the gene mat. return if the vector is valid

func (*EnhlinkObject) defineYgiVectorFromPeakMat 

func (eo *EnhlinkObject) defineYgiVectorFromPeakMat(targetPeak utils.Peak) (isValid bool)

defineYgiVectorFromPeakMat define the endog ygi vectors using the peak mat. return if the vector is valid

func (*EnhlinkObject) getIGFloat 

func (eo *EnhlinkObject) getIGFloat(xgiArr *[]uint, ygi uint) (infGainScore float64)

getIGFloat return weighted Information gain for float ygi vector. Dichotomize ygi using nonNullMean and compute IG. The final score is IG x non-null ygi ratio x non-null feature ratio

func (*EnhlinkObject) getInformationGain 

func (eo *EnhlinkObject) getInformationGain(xgiArr *[]uint, ygi uint) (infGainScore float64)

getIGFloat return weighted Information gain for integer ygi vector. The final score is IG x non-null ygi ratio x non-null feature ratio

func (*EnhlinkObject) initIntervals 

func (eo *EnhlinkObject) initIntervals()

initIntervals init (*eo).YgiInterval. If (*eo).rmPeaksInPromoters is true, remove from index ygis intersecting promoters

func (*EnhlinkObject) initRandomYgiFor2ndOrder 

func (eo *EnhlinkObject) initRandomYgiFor2ndOrder(totnbRealFeat int)

func (*EnhlinkObject) initSimFloatMat 

func (eo *EnhlinkObject) initSimFloatMat()

func (*EnhlinkObject) initSimMat 

func (eo *EnhlinkObject) initSimMat()

func (*EnhlinkObject) initSimWriter 

func (eo *EnhlinkObject) initSimWriter()

func (*EnhlinkObject) initSurroundingEnhancersMat 

func (eo *EnhlinkObject) initSurroundingEnhancersMat(peak utils.Peak)

initSurroundingEnhancersMat

func (*EnhlinkObject) initWriters 

func (eo *EnhlinkObject) initWriters()

func (*EnhlinkObject) initWritersWithHeader 

func (eo *EnhlinkObject) initWritersWithHeader()

func (*EnhlinkObject) initYgiVectCovar 

func (eo *EnhlinkObject) initYgiVectCovar(ygi uint)

func (*EnhlinkObject) initbucketCovariates 

func (eo *EnhlinkObject) initbucketCovariates()

func (*EnhlinkObject) writePvals 

func (eo *EnhlinkObject) writePvals(pvals []pvalPoint, cluster string)

func (*EnhlinkObject) writePvals2ndOrder 

func (eo *EnhlinkObject) writePvals2ndOrder(pvals []pvalPoint, cluster string, currentYgi uint)

func (*EnhlinkObject) writePvalsSim 

func (eo *EnhlinkObject) writePvalsSim(pvals []pvalPoint, cluster string)

type LinkType 

LinkType type of link to keep from {“all”, “positive”, “negative”}

type LinkType string

const (
    allLink LinkType = "all"
    posLink LinkType = "positive"
    negLink LinkType = "negative"
)

func (LinkType) IsValid 

func (t LinkType) IsValid() LinkType

IsValid is the matrix format valid

type MaxFeaturesType 

MaxFeaturesType max features type

type MaxFeaturesType struct {
    mfString string
    fracFeat float64
    nbFeat   int
}

func (*MaxFeaturesType) SelectFeatures 

func (mf *MaxFeaturesType) SelectFeatures(ygiMap map[uint]bool) map[uint]bool

SelectFeatures create feature map according to the strategy chosen

func (*MaxFeaturesType) Set 

func (mf *MaxFeaturesType) Set(v string) error

Set set value

func (*MaxFeaturesType) String 

func (mf *MaxFeaturesType) String() string

func (*MaxFeaturesType) check 

func (mf *MaxFeaturesType) check()

type PromoterList 

PromoterList map[geneID] -> list of peaks

type PromoterList map[string][]utils.Peak

func LoadPromotersFile 

func LoadPromotersFile(fname utils.Filename) (plist PromoterList)

LoadPromotersFile load the promoter file

func (*PromoterList) Len 

func (pl *PromoterList) Len() int

Len return length

type TreeAttributes 

TreeAttributes attributes for enhlink

type TreeAttributes struct {
    // Number of internal threads to perform the multiple tasks
    NbThreads int
    // Remove peaks within promoter boundaries
    RmPeaksInPromoters bool
    // region in number of base pairs to define the surrounding enhancers
    SurroundingSize int
    //Min matrix size
    MinMatSize int
    // Max depth
    MaxDepth int
    // min leaf size of the tree
    MinLeafSize int
    // Number of boostraps
    NBboot int
    // P-value threshold
    Threshold float64
    // Downsample the number of samples
    Downsample int
    // output directory and files tag
    OutDir, OutTag string
    // Maximum number of explanatory features per bootstrap model.
    MaxNbFeatures int
    // Maximum number of explanatory features per bootstrap model for second order models.
    SecondOrderMaxFeat int
    // Number of simulated features to use
    NbSimFeat int
    // Poisson parameter to control the amount of dropouts of the simulated variables
    Lambda1 float64
    // Poisson parameter to control the amount of false positive of the simulated variables
    Lambda2 float64
    // Keep the main ColMat matrix sparse. Usefull for memory reason if background is very large
    KeepSparse bool
    // Maximum of features to be considered for a given tree. {\"all\", \"sqrt\", \"log\"}* or int float/
    //Which links to keep {all pos, nef}
    LinkType    LinkType
    MaxFeatType MaxFeaturesType
    // only perform simulation
    OnlySim bool
    //Identify the covariates associated with each inferred enhancer-promoter links
    SecondOrder bool
    //Ignore Enhancers weight (the ratio of accessibility) in the computation of the modified IF
    IgnoreEnhancerWeight bool
    // For each tree, Randomly sample the cells to have an uniform covariate distribution
    UniformSampling bool
    //////////////// Arguments used only for header writing ////////////
    Version           string
    MatAttr, GmatAttr matrix.Attributes
    // mergingCutoff only used for header writting
    MergingCutoff int
    IsGeneExpr    bool
    //// Files ////
    PromoterFile, Metadata utils.Filename

    // verbose
    Verbose bool
}

type pvalPoint 

type pvalPoint struct {
    pval, fdr, score float64
    index            uint
    isValid          bool
}

enhtools

import "gitlab.com/Grouumf/enhlinktools/enhtools"

Library that compiles the enhtools executable

enhtools interescts bedpe files and compute accuracy metrics (TPR, FPR, F-score…)

USAGE:

# with -intersect (Intersection of output directory obtained from enhlink)
enhtools -intersect -in <directory> -in2 <directory> -out <directory> (optional: -tag/tag2/outtag <string> -scorePos/pvalPos/geneIDPos <string> -mergeScore {left, right, mean} -prec <float> )

# with -intersect2 (Intersection of two bedpe files)
enhtools -intersect2 -in <bedpe file> -in2 <bedpe file> -out <directory> (optional: -tag/tag2/outtag <string> -scorePos/pvalPos/geneIDPos <string> -mergeScore {left, right, mean} -prec <float> -stdout)

# with intersect3 (Intersect input bedpe files, based on unperfect matches. A match occurs if both regions of the 2 bedpe files intersect respectively)
enhtools -intersect3 -in <fileRef>  -in2 <fileTarget> -out <file> (optional:  -stdout)

# with intersect4 (Intersect input bedpe files, based on unperfect matches. A match occurs if at least one of two regions of the first bedpe file  intersects with one of the two regions of the second bedpe file)
enhtools -intersect4 -in <fileRef>  -in2 <fileTarget> -out <file> (optional:  -stdout -stdout scorePos/pvalPos)

# with diff (Difference between (file1 - file2) input bedpe files, based on unperfect matches. A match occurs if both regions of the 2 bedpe files intersect respectively)
	enhtools -diff -in <fileRef>  -in2 <fileTarget> -out <file> (optional: -stdout)

        # with -filter (filter links from bedpe file (-in) which are not within at least one of the region defined in (-bed). If -diff is added, only the links not within at least one region of -bed will be outputed. -filter is well adapted for filtering links not in TAD regions, defined in a BED file
        enhtools -filter -in <bedbe file> -bed <bed file> (optional: -stdout -diff)

1

TIPS: scorePos/pvalPos/geneIDPos can be used to set different column IDs for in and in2. Use the “:” to delimitate the seprators for the files from in and in2

Index

Variables
func checkIfLineCanBeSplitIntoPeaks(line, sep string, peakPos []int, peakMax, nbPeaks int)
func filterBedpe()
func filterWithBed(bedpeFile, bedFile utils.Filename, outFile string)
func getPosFromOption(ps, option string, left bool) (pos []int)
func incompleteIntervalIntersect(file1, file2 utils.Filename, outFile string)
func intersectBedpeWithBedFile()
func intersectBedpes()
func intersectBedpes2()
func intersectBedpes3()
func intersectBedpes4()
func intersectOneInput(file1, file2, clTag string, waiting *sync.WaitGroup)
func intersectWithBed(bedpeFile, bedFile utils.Filename, outFile string)
func intervalIntersect(file1, file2 utils.Filename, outFile string)
func isIntersecting(peak1, peak2 utils.Peak) bool
func loadPeakFile(fname utils.Filename, sepIn, sepOut string, isLeft bool) (promPeakDict twoKeysBoolMap, pairsMeta metaMap, promGeneMap map[string]string)
func main()
func writeHeader(writer *io.WriteCloser)
func writeOneInterToBuffer(buffer *bytes.Buffer, meta1, meta2 peakMeta, enh1, prom1 string)
func writeStats(foutStat, clTag string, genestats geneStatsMap)
type geneStats
type geneStatsMap
- func (gsm *geneStatsMap) Init()
- func (gsm *geneStatsMap) incLeft(gene string, inc int)
- func (gsm *geneStatsMap) incRight(gene string, inc int)
- func (gsm *geneStatsMap) incTwoSides(gene string, inc int)
type intervalResults
- func loadTree(bedpeFile utils.Filename) (treeResult intervalResults)
type matching
- func (t matching) isValid() matching
type mergeFunc
type metaMap
type peakMeta
type peakPair
type scoreMerging
- func (sm *scoreMerging) check(mergingType string)
- func (sm *scoreMerging) merge(score1, score2 float64, mergingType string) float64
type twoKeysBoolMap

Variables

BEDFILE bed file containing regions used to filter links

var BEDFILE string

DIFF bedpe difference for intersect3

var DIFF bool

FILTER filter bedpe if they are not within one of the region defined in the bed file

var FILTER bool

GENESID bedpe column ID(s) for the gene ID

var GENESID string

INPUT1 output directory

var INPUT1 string

INPUT2 output directory

var INPUT2 string

INTAG output files tag

var INTAG string

INTAG2 output files tag

var INTAG2 string

INTERSECT intersect inputs

var INTERSECT bool

INTERSECT2 intersect inputs

var INTERSECT2 bool

INTERSECT3 intersect inputs based on unperfect matches if both regions of the 2 bedpe files intersect

var INTERSECT3 bool

INTERSECT4 Intersect input bedpe files, based on unperfect matches. A match occurs if at least one of two regions of the first bedpe file intersects with one of the two regions of the second bedpe file

var INTERSECT4 bool

INTERSECTWITHBED Intersect bedpe with bed (at least one bedpe region of intersect one of the BED region)

var INTERSECTWITHBED bool

MATCHING matching type for -intersectBed “either” (default) “left” (left region of the bed), “right” (right region), or “both (both regions match)”

var MATCHING string

MATCHINGTYPE possible matching options

var MATCHINGTYPE = [...]matching{either, left, right, both}

MERGINGTYPE howto merge score

var MERGINGTYPE string

OUTDIR output directory

var OUTDIR string

OUTTAG output files tag

var OUTTAG string

PREC precision

var PREC int

PVALSPOS bedpe column ID(s) for the pvals

var PVALSPOS string

SCORESPOS column ID(s) for the scores

var SCORESPOS string

SHOWVERSION show version and quit

var SHOWVERSION bool

STDOUT write output to stdout

var STDOUT bool

func checkIfLineCanBeSplitIntoPeaks 

func checkIfLineCanBeSplitIntoPeaks(line, sep string, peakPos []int, peakMax, nbPeaks int)

func filterBedpe 

func filterBedpe()

func filterWithBed 

func filterWithBed(bedpeFile, bedFile utils.Filename, outFile string)

func getPosFromOption 

func getPosFromOption(ps, option string, left bool) (pos []int)

func incompleteIntervalIntersect 

func incompleteIntervalIntersect(file1, file2 utils.Filename, outFile string)

func intersectBedpeWithBedFile 

func intersectBedpeWithBedFile()

func intersectBedpes 

func intersectBedpes()

func intersectBedpes2 

func intersectBedpes2()

func intersectBedpes3 

func intersectBedpes3()

func intersectBedpes4 

func intersectBedpes4()

func intersectOneInput 

func intersectOneInput(file1, file2, clTag string, waiting *sync.WaitGroup)

func intersectWithBed 

func intersectWithBed(bedpeFile, bedFile utils.Filename, outFile string)

func intervalIntersect 

func intervalIntersect(file1, file2 utils.Filename, outFile string)

func isIntersecting 

func isIntersecting(peak1, peak2 utils.Peak) bool

func loadPeakFile 

func loadPeakFile(fname utils.Filename, sepIn, sepOut string, isLeft bool) (promPeakDict twoKeysBoolMap, pairsMeta metaMap, promGeneMap map[string]string)

func main 

func main()

main main function

func writeHeader 

func writeHeader(writer *io.WriteCloser)

func writeOneInterToBuffer 

func writeOneInterToBuffer(buffer *bytes.Buffer, meta1, meta2 peakMeta, enh1, prom1 string)

func writeStats 

func writeStats(foutStat, clTag string, genestats geneStatsMap)

type geneStats 

type geneStats struct {
    total, left, right int
    leftHit, rightHit  int
}

type geneStatsMap 

type geneStatsMap struct {
    smap map[string]geneStats
    all  geneStats
}

func (*geneStatsMap) Init 

func (gsm *geneStatsMap) Init()

func (*geneStatsMap) incLeft 

func (gsm *geneStatsMap) incLeft(gene string, inc int)

func (*geneStatsMap) incRight 

func (gsm *geneStatsMap) incRight(gene string, inc int)

func (*geneStatsMap) incTwoSides 

func (gsm *geneStatsMap) incTwoSides(gene string, inc int)

type intervalResults 

type intervalResults struct {
    chrIntervalTree map[string]*interval.IntTree
    intervalMapping map[uintptr]utils.Peak
    metaMapping     map[uintptr][]string
}

func loadTree 

func loadTree(bedpeFile utils.Filename) (treeResult intervalResults)

type matching 

matching type

type matching string

const (
    // matching type
    either matching = "either"
    both   matching = "both"
    left   matching = "left"
    right  matching = "right"
)

func (matching) isValid 

func (t matching) isValid() matching

isValid is the matching type valid

type mergeFunc 

type mergeFunc func(score1, score2 float64) float64

type metaMap 

type metaMap map[peakPair]peakMeta

type peakMeta 

type peakMeta struct {
    pvals, scores []float64
    gene          string
}

type peakPair 

type peakPair [2]string

type scoreMerging 

type scoreMerging struct {
    mergingType string
    isInit      bool
    mfunc       mergeFunc
}

func (*scoreMerging) check 

func (sm *scoreMerging) check(mergingType string)

func (*scoreMerging) merge 

func (sm *scoreMerging) merge(score1, score2 float64, mergingType string) float64

type twoKeysBoolMap 

type twoKeysBoolMap map[string]map[string]bool

matrix

import "gitlab.com/Grouumf/enhlinktools/matrix"

Package matrix is a library to load sparse matrices from single-cell data and/or mtx and COO format and create efficient indexing.

Index

Constants
Variables
func GetRandomBootstrapIndex(arr []uint, downsample int) (index []uint)
func LoadIndexFileToIndex(fname utils.Filename, downcase bool, refMapping map[string]uint) (celliddict map[string]uint, maxIndex int)
func LoadPeakDictsToIndex(fname utils.Filename, sepIn, sepOut string) (featiddict map[string]uint, maxIndex int)
func Mean(arr []float64, size int) (mean float64)
func Std(arr []float64, mean float64, size int) (std float64)
func TestStringToPeak(str string) error
func TtestPval(mean, std float64, size int) (pval float64)
func diffMap(m1, m2 map[string]uint) (missing []string)
func maxUintMap(imap map[string]uint) (maxMap int)
func minIndexSliceInt(slice []int, validIndexes []int) int
func minInt(a, b int) int
func processMtxHeader(ismtx, transpose bool, reader *bufio.Scanner, maxLengthX, maxLengthY int, xgi, ygi utils.Filename) (splitChar string)
func reverseIndex(index map[string]uint, lenIndex int) (indexC []string)
func reverseIndexC(indexC []string) (index map[string]uint)
type Attributes
type Format
- func (t Format) isValid() Format
type MatColFloatHash
- func (mc *MatColFloatHash) GetRow(ygi uint) map[uint]float64
- func (mc *MatColFloatHash) Init(matCol []map[uint]float64, xDim uint)
type MatColHash
- func (mc *MatColHash) Get(ygi, xgi uint) bool
- func (mc *MatColHash) GetCol(ygi uint) []bool
- func (mc *MatColHash) GetDim() (xDim, yDim int)
- func (mc *MatColHash) GetIndex(ygi uint) uint
- func (mc *MatColHash) GetRow(ygi uint) map[uint]bool
- func (mc *MatColHash) GetRowDense(ygi uint) (vect []bool)
- func (mc *MatColHash) Init(matCol *[]map[uint]bool, xDim uint)
- func (mc *MatColHash) InitDense(matColDense [][]bool)
- func (mc *MatColHash) IsDense() bool
- func (mc *MatColHash) Len(ygiIndex uint) int
- func (mc *MatColHash) RmDense()
- func (mc *MatColHash) ToDense()
- func (mc *MatColHash) ToDenseFromSubset(ygis []uint) (newYgis []uint)
- func (mc *MatColHash) ToDenseFromSubsetAlreadyLoaded(ygis []uint) (newYgis []uint)
type SparseBoolMatrix
- func (sbm *SparseBoolMatrix) CreateRandMat(nbFeat int, refFeats []uint)
- func (sbm *SparseBoolMatrix) GetMatColT() []map[uint]bool
- func (sbm *SparseBoolMatrix) GetUniformSampling(downsample, totXgi int, matColBucket [][]uint) (xgiIndex []uint)
- func (sbm *SparseBoolMatrix) Init(attributes Attributes)
- func (sbm *SparseBoolMatrix) Init2(attributes Attributes)
- func (sbm *SparseBoolMatrix) InitMeta(xgiMap map[string]uint, attributes Attributes, skipFirst bool)
- func (sbm *SparseBoolMatrix) InitTranspose()
- func (sbm *SparseBoolMatrix) LoadClustersFile()
- func (sbm *SparseBoolMatrix) LoadMatrix()
- func (sbm *SparseBoolMatrix) LoadMatrix2(xgiMap, ygiMap map[string]uint)
- func (sbm *SparseBoolMatrix) initThreading(attributes Attributes)
- func (sbm *SparseBoolMatrix) loadMatrixCoo(matFormat Format, xgiSubset, ygiSubset map[string]uint)
- func (sbm *SparseBoolMatrix) loadMatrixCooOneTh(count, nblines, thID int, lines *[buffsize]string, xgiSubset, ygiSubset map[string]uint, matColMain *[]map[uint]bool, splitChar string, transpose, delOne bool)
- func (sbm *SparseBoolMatrix) loadMetaMatrix(xgiMap map[string]uint, skipFirst bool)
type SparseFloatMatrix
- func (sfm *SparseFloatMatrix) Init(attributes Attributes)
- func (sfm *SparseFloatMatrix) LoadMatrix(xgiMap, ygiMap map[string]uint)
- func (sfm *SparseFloatMatrix) initThreading(attributes Attributes)
- func (sfm *SparseFloatMatrix) loadMatrixFloat(xgiSubset, ygiSubset map[string]uint)
- func (sfm *SparseFloatMatrix) loadMatrixFloatOneTh(count, nblines, thID int, lines *[buffsize]string, xgiSubset, ygiSubset map[string]uint, matColMain *[]map[uint]float64, splitChar string, transpose, delOne bool)

Constants

const (
    // Matrix format
    coo        Format = "coo"
    mtx        Format = "mtx"
    cellRanger Format = "cellRanger"
    buffsize   int    = 120000
    nbSteps    int    = 100
)

Variables

MATRIXFORMATS possible options for matrix format

var MATRIXFORMATS = [...]Format{coo, mtx, cellRanger}

func GetRandomBootstrapIndex 

func GetRandomBootstrapIndex(arr []uint, downsample int) (index []uint)

GetRandomBootstrapIndex get a random index with repetition

func LoadIndexFileToIndex 

func LoadIndexFileToIndex(fname utils.Filename, downcase bool, refMapping map[string]uint) (celliddict map[string]uint, maxIndex int)

LoadIndexFileToIndex create cell ID index dict. Return also max Index

func LoadPeakDictsToIndex 

func LoadPeakDictsToIndex(fname utils.Filename, sepIn, sepOut string) (featiddict map[string]uint, maxIndex int)

LoadPeakDictsToIndex create cell ID index dict

func Mean 

func Mean(arr []float64, size int) (mean float64)

Mean return mean of arr given total size

func Std 

func Std(arr []float64, mean float64, size int) (std float64)

Std return std of arr given total size

func TestStringToPeak 

func TestStringToPeak(str string) error

TestStringToPeak test if string is a valid peak

func TtestPval 

func TtestPval(mean, std float64, size int) (pval float64)

TtestPval return Student test pval using T CDF

func diffMap 

func diffMap(m1, m2 map[string]uint) (missing []string)

func maxUintMap 

func maxUintMap(imap map[string]uint) (maxMap int)

func minIndexSliceInt 

func minIndexSliceInt(slice []int, validIndexes []int) int

func minInt 

func minInt(a, b int) int

func processMtxHeader 

func processMtxHeader(ismtx, transpose bool, reader *bufio.Scanner, maxLengthX, maxLengthY int, xgi, ygi utils.Filename) (splitChar string)

func reverseIndex 

func reverseIndex(index map[string]uint, lenIndex int) (indexC []string)

reverseIndex Internal function to reverse a map index

func reverseIndexC 

func reverseIndexC(indexC []string) (index map[string]uint)

reverseIndexC Internal function to reverse an index

type Attributes 

Attributes matrix attributes pasrsed during init

type Attributes struct {
    Xgi          utils.Filename
    Ygi          utils.Filename
    MatFile      utils.Filename
    XgiSubset    utils.Filename
    YgiSubset    utils.Filename
    ClustersFile utils.Filename
    MatrixFormat string
    NbThreads    int
}

type Format 

Format matrix format type

type Format string

func (Format) isValid 

func (t Format) isValid() Format

isValid is the matrix format valid

type MatColFloatHash 

MatColFloatHash matrix column class for sparse float matrix

type MatColFloatHash struct {
    // Column matrix mat[ygi][xgi]
    matCol []map[uint]float64
    // Dense column matrix mat[ygi][xgi]
    // Index
    subIndexHash map[int]uint
    xDim, yDim   uint
}

func (*MatColFloatHash) GetRow 

func (mc *MatColFloatHash) GetRow(ygi uint) map[uint]float64

GetRow get row from matCol using a sparse map[uint]bool

func (*MatColFloatHash) Init 

func (mc *MatColFloatHash) Init(matCol []map[uint]float64, xDim uint)

Init init MatColHash

type MatColHash 

MatColHash matrix column class that can allocate dense submatrices

type MatColHash struct {
    // Column matrix mat[ygi][xgi]
    matCol *[]map[uint]bool
    // Dense column matrix mat[ygi][xgi]
    matColDense [][]bool
    // Index
    subIndexHash map[int]uint
    xDim         uint
    isDense      bool
}

func (*MatColHash) Get 

func (mc *MatColHash) Get(ygi, xgi uint) bool

Get get matrix value

func (*MatColHash) GetCol 

func (mc *MatColHash) GetCol(ygi uint) []bool

GetCol get matrix coloumn in dense bool vector

func (*MatColHash) GetDim 

func (mc *MatColHash) GetDim() (xDim, yDim int)

GetDim return dimenssion

func (*MatColHash) GetIndex 

func (mc *MatColHash) GetIndex(ygi uint) uint

GetIndex get index from hashed ygi

func (*MatColHash) GetRow 

func (mc *MatColHash) GetRow(ygi uint) map[uint]bool

GetRow get row from matCol using a sparse map[uint]bool

func (*MatColHash) GetRowDense 

func (mc *MatColHash) GetRowDense(ygi uint) (vect []bool)

GetRowDense return row vector as a dense bool array. If matrix is not sparse, construct the vector

func (*MatColHash) Init 

func (mc *MatColHash) Init(matCol *[]map[uint]bool, xDim uint)

Init init MatColHash

func (*MatColHash) InitDense 

func (mc *MatColHash) InitDense(matColDense [][]bool)

InitDense init MatColHash with a dense matrix

func (*MatColHash) IsDense 

func (mc *MatColHash) IsDense() bool

IsDense return if struct dense is initiated

func (*MatColHash) Len 

func (mc *MatColHash) Len(ygiIndex uint) int

Len Return the number of non-zero elements of a columns

func (*MatColHash) RmDense 

func (mc *MatColHash) RmDense()

RmDense remove dense matrix if any

func (*MatColHash) ToDense 

func (mc *MatColHash) ToDense()

ToDense sparse to dense

func (*MatColHash) ToDenseFromSubset 

func (mc *MatColHash) ToDenseFromSubset(ygis []uint) (newYgis []uint)

ToDenseFromSubset sparse to dense

func (*MatColHash) ToDenseFromSubsetAlreadyLoaded 

func (mc *MatColHash) ToDenseFromSubsetAlreadyLoaded(ygis []uint) (newYgis []uint)

ToDenseFromSubsetAlreadyLoaded sparse to dense but does not recreate matColDense because already loaded (used when neighborhood == 0)

type SparseBoolMatrix 

SparseBoolMatrix class

type SparseBoolMatrix struct {
    // Input files
    xgi, ygi, matFile, clustersFile utils.Filename
    xgiSubset, ygiSubset            utils.Filename
    matrixFormat                    Format

    XgiIndex, YgiIndex   []string
    XgiIndexC, YgiIndexC map[string]uint
    Clusters             map[string][]uint // cluster key -> list of cell IDs
    Xdim, Ydim           int               // Dimension of the matrixyDim int // Dimension of the matrix

    MatCol     MatColHash // mat.Get(posy, posx)
    RandMatCol MatColHash // mat[posy][posx] with random posx from

    matCol, matColT []map[uint]bool // Original matCol value and passed as reference to MatCol. MatcolT is the transpose

    ////////  Sync utils ////////
    nbThreads int
    waiting   sync.WaitGroup
    guard     chan int
    mutex     sync.Mutex
}

func (*SparseBoolMatrix) CreateRandMat 

func (sbm *SparseBoolMatrix) CreateRandMat(nbFeat int, refFeats []uint)

CreateRandMat Create a random matrix of size nbFeat x len(XgiIndex)

func (*SparseBoolMatrix) GetMatColT 

func (sbm *SparseBoolMatrix) GetMatColT() []map[uint]bool

GetMatColT Get MatColT

func (*SparseBoolMatrix) GetUniformSampling 

func (sbm *SparseBoolMatrix) GetUniformSampling(downsample, totXgi int, matColBucket [][]uint) (xgiIndex []uint)

GetUniformSampling get a uniform sampling of the xgi indexes according to the ygi

func (*SparseBoolMatrix) Init 

func (sbm *SparseBoolMatrix) Init(attributes Attributes)

Init Init dedicated to the gene matrix without loading the cluster file. The ygi index is regarded as peak region and the Clusters file is loaded

func (*SparseBoolMatrix) Init2 

func (sbm *SparseBoolMatrix) Init2(attributes Attributes)

Init2 Init dedicated to the gene matrix without loading the cluster file. The ygi index is not regarded as peak region and is loaded with LoadIndexFileToIndex

func (*SparseBoolMatrix) InitMeta 

func (sbm *SparseBoolMatrix) InitMeta(xgiMap map[string]uint, attributes Attributes, skipFirst bool)

InitMeta init Metadata matrix, drop first binary attributes

func (*SparseBoolMatrix) InitTranspose 

func (sbm *SparseBoolMatrix) InitTranspose()

InitTranspose create a transpose matrix of matCol and instantiate matColBucket

func (*SparseBoolMatrix) LoadClustersFile 

func (sbm *SparseBoolMatrix) LoadClustersFile()

LoadClustersFile load cluster file for sparse matrix

func (*SparseBoolMatrix) LoadMatrix 

func (sbm *SparseBoolMatrix) LoadMatrix()

LoadMatrix load matrix

func (*SparseBoolMatrix) LoadMatrix2 

func (sbm *SparseBoolMatrix) LoadMatrix2(xgiMap, ygiMap map[string]uint)

LoadMatrix2 load matrix with xgi and ygi Index. If ygiMap is empty, use the default ygi index

func (*SparseBoolMatrix) initThreading 

func (sbm *SparseBoolMatrix) initThreading(attributes Attributes)

func (*SparseBoolMatrix) loadMatrixCoo 

func (sbm *SparseBoolMatrix) loadMatrixCoo(matFormat Format, xgiSubset, ygiSubset map[string]uint)

loadMatrixCoo load function with either MTX header or not. if xgiSubset is provided, replace xgi index by the index present in xgiSubset

func (*SparseBoolMatrix) loadMatrixCooOneTh 

func (sbm *SparseBoolMatrix) loadMatrixCooOneTh(count, nblines, thID int, lines *[buffsize]string, xgiSubset, ygiSubset map[string]uint, matColMain *[]map[uint]bool, splitChar string, transpose, delOne bool)

func (*SparseBoolMatrix) loadMetaMatrix 

func (sbm *SparseBoolMatrix) loadMetaMatrix(xgiMap map[string]uint, skipFirst bool)

Load meta file with a header and in dense tsv format. If skipFirst, he first value of each field is skipped to avoid singluar matrix

type SparseFloatMatrix 

SparseFloatMatrix class

type SparseFloatMatrix struct {
    // Input files
    xgi, ygi, matFile    utils.Filename
    xgiSubset, ygiSubset utils.Filename
    matrixFormat         Format

    XgiIndex, YgiIndex   []string
    XgiIndexC, YgiIndexC map[string]uint
    Xdim, Ydim           int // Dimension of the matrixyDim int // Dimension of the matrix

    MatCol MatColFloatHash // mat.Get(posy, posx)

    ////////  Sync utils ////////
    nbThreads int
    waiting   sync.WaitGroup
    guard     chan int
    mutex     sync.Mutex
}

func (*SparseFloatMatrix) Init 

func (sfm *SparseFloatMatrix) Init(attributes Attributes)

Init Init dedicated to the gene matrix without loading the cluster file. The ygi index is regarded as peak region and the Clusters file is loaded

func (*SparseFloatMatrix) LoadMatrix 

func (sfm *SparseFloatMatrix) LoadMatrix(xgiMap, ygiMap map[string]uint)

LoadMatrix load float matrix with xgi and ygi Index. If ygiMap is empty, use the default ygi index

func (*SparseFloatMatrix) initThreading 

func (sfm *SparseFloatMatrix) initThreading(attributes Attributes)

func (*SparseFloatMatrix) loadMatrixFloat 

func (sfm *SparseFloatMatrix) loadMatrixFloat(xgiSubset, ygiSubset map[string]uint)

loadMatrixFloat load function with either MTX header or not. if xgiSubset is provided, replace xgi index by the index present in xgiSubset

func (*SparseFloatMatrix) loadMatrixFloatOneTh 

func (sfm *SparseFloatMatrix) loadMatrixFloatOneTh(count, nblines, thID int, lines *[buffsize]string, xgiSubset, ygiSubset map[string]uint, matColMain *[]map[uint]float64, splitChar string, transpose, delOne bool)

Generated by gomarkdoc

enhgrid

Library that compiles the enhgrid executable

Index

Variables

func analyseOneGeneList

func getGeneBucketsFromGene

func getGeneBucketsFromPromoter

func launchOneIterThread

func main

func mergeBucketResultsFile

func mergeOneSetOfBucketFiles

func processNGeneLists

func reduce

func splitGenesToBucket

func stringToFloatArray

func stringToIntArray

func stringToMaxFeatTypeArray

func testIfRequiredFilesExist

type paramArrays

func (*paramArrays) generateAllCombination

func (*paramArrays) init

func (*paramArrays) initIterators

func (*paramArrays) iter

func (*paramArrays) returnLastThreadVal

enhlink

Library that compiles the enhlink executable

Index

Variables

func main

func testIfRequiredFilesExist

enhlinkobject

Index

Variables

func AssertIfFileExists

func MergeClosePromoterRegions

func pickNGenesAtRandom

type EnhlinkObject

func (*EnhlinkObject) AnalyseAllGenesFromGeneMat

func (*EnhlinkObject) AnalyseAllPromoters

func (*EnhlinkObject) AnalyseNGenes

func (*EnhlinkObject) AnalyseOneGene

func (*EnhlinkObject) AnalyseRandomSubsetFromGeneMat

func (*EnhlinkObject) AnalyseRandomSubsetOfPromoters

func (*EnhlinkObject) Init

func (*EnhlinkObject) analyseOneGene

func (*EnhlinkObject) blacklistAllPeaksInPromoter

func (*EnhlinkObject) computeOnePvalue

func (*EnhlinkObject) computePvalues

func (*EnhlinkObject) computeRecursiveIGFloat

func (*EnhlinkObject) computeRecursiveInformationGain

func (*EnhlinkObject) computeTrees

func (*EnhlinkObject) computeTreesCovar

func (*EnhlinkObject) computeTreesOneThreads

func (*EnhlinkObject) computeTreesSim

func (*EnhlinkObject) createYgiMapForCovar

func (*EnhlinkObject) deferCloseFiles

func (*EnhlinkObject) defineBoolYgiVectorFromPeakMat

func (*EnhlinkObject) defineClusterFloatYgiSum

func (*EnhlinkObject) defineClusterYgiSum

func (*EnhlinkObject) defineYgiVectorFromFloatMat

func (*EnhlinkObject) defineYgiVectorFromGeneMat

func (*EnhlinkObject) defineYgiVectorFromPeakMat

func (*EnhlinkObject) getIGFloat

func (*EnhlinkObject) getInformationGain

func (*EnhlinkObject) initIntervals

func (*EnhlinkObject) initRandomYgiFor2ndOrder

func (*EnhlinkObject) initSimFloatMat

func (*EnhlinkObject) initSimMat

func (*EnhlinkObject) initSimWriter

func (*EnhlinkObject) initSurroundingEnhancersMat

func (*EnhlinkObject) initWriters

func (*EnhlinkObject) initWritersWithHeader

func (*EnhlinkObject) initYgiVectCovar

func (*EnhlinkObject) initbucketCovariates

func (*EnhlinkObject) writePvals

func (*EnhlinkObject) writePvals2ndOrder

func (*EnhlinkObject) writePvalsSim

type LinkType

func (LinkType) IsValid

type MaxFeaturesType

func analyseOneGeneList 

func getGeneBucketsFromGene 

func getGeneBucketsFromPromoter 

func launchOneIterThread 

func main 

func mergeBucketResultsFile 

func mergeOneSetOfBucketFiles 

func processNGeneLists 

func reduce 

func splitGenesToBucket 

func stringToFloatArray 

func stringToIntArray 

func stringToMaxFeatTypeArray 

func testIfRequiredFilesExist 

type paramArrays 

func (*paramArrays) generateAllCombination 

func (*paramArrays) init 

func (*paramArrays) initIterators 

func (*paramArrays) iter 

func (*paramArrays) returnLastThreadVal 

func main 

func testIfRequiredFilesExist 

func AssertIfFileExists 

func MergeClosePromoterRegions 

func pickNGenesAtRandom 

type EnhlinkObject 

func (*EnhlinkObject) AnalyseAllGenesFromGeneMat 

func (*EnhlinkObject) AnalyseAllPromoters 

func (*EnhlinkObject) AnalyseNGenes 

func (*EnhlinkObject) AnalyseOneGene 

func (*EnhlinkObject) AnalyseRandomSubsetFromGeneMat 

func (*EnhlinkObject) AnalyseRandomSubsetOfPromoters 

func (*EnhlinkObject) Init 

func (*EnhlinkObject) analyseOneGene 

func (*EnhlinkObject) blacklistAllPeaksInPromoter 

func (*EnhlinkObject) computeOnePvalue 

func (*EnhlinkObject) computePvalues 

func (*EnhlinkObject) computeRecursiveIGFloat 

func (*EnhlinkObject) computeRecursiveInformationGain 

func (*EnhlinkObject) computeTrees 

func (*EnhlinkObject) computeTreesCovar 

func (*EnhlinkObject) computeTreesOneThreads 

func (*EnhlinkObject) computeTreesSim 

func (*EnhlinkObject) createYgiMapForCovar 

func (*EnhlinkObject) deferCloseFiles 

func (*EnhlinkObject) defineBoolYgiVectorFromPeakMat 

func (*EnhlinkObject) defineClusterFloatYgiSum 

func (*EnhlinkObject) defineClusterYgiSum 

func (*EnhlinkObject) defineYgiVectorFromFloatMat 

func (*EnhlinkObject) defineYgiVectorFromGeneMat 

func (*EnhlinkObject) defineYgiVectorFromPeakMat 

func (*EnhlinkObject) getIGFloat 

func (*EnhlinkObject) getInformationGain 

func (*EnhlinkObject) initIntervals 

func (*EnhlinkObject) initRandomYgiFor2ndOrder 

func (*EnhlinkObject) initSimFloatMat 

func (*EnhlinkObject) initSimMat 

func (*EnhlinkObject) initSimWriter 

func (*EnhlinkObject) initSurroundingEnhancersMat 

func (*EnhlinkObject) initWriters 

func (*EnhlinkObject) initWritersWithHeader 

func (*EnhlinkObject) initYgiVectCovar 

func (*EnhlinkObject) initbucketCovariates 

func (*EnhlinkObject) writePvals 

func (*EnhlinkObject) writePvals2ndOrder 

func (*EnhlinkObject) writePvalsSim 

type LinkType 

func (LinkType) IsValid 

type MaxFeaturesType 

func (*MaxFeaturesType) SelectFeatures 

func (*MaxFeaturesType) Set 

func (*MaxFeaturesType) String 

func (*MaxFeaturesType) check 

type PromoterList 

func LoadPromotersFile 

func (*PromoterList) Len 

type TreeAttributes 

type pvalPoint 

func checkIfLineCanBeSplitIntoPeaks 

func filterBedpe 