If you have installed Cell Ranger, you can use it to process FASTQ files in Python Notebook.¶

Function to run cell ranger

import subprocess, gzip;
from subprocess import Popen, PIPE,STDOUT
def runCellRange(workspace,fastq_path,samples,expect_cells,transcriptpath,run_name):
    run_name=run_name.replace(" ","_");
    resultfile ="";
    pcheck = subprocess.Popen("cellranger", shell=True, stdin=PIPE, stdout=PIPE,stderr=STDOUT)
    output =pcheck.stdout.read();
    output=str(output)
    if "command not found" in output:
        print("Please install cellranger 3.0");
        print("Tutorial: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/installation")
        print("If you have counts data. please skip this step.")
        return ""
    if not os.path.isdir(workspace):
        print("workspace is not a dir");
        return;

    if len(samples) ==0 :
        print("Please input samples");
        return;
    if not os.path.isdir(fastq_path):
        print("fastq path is not a dir");
        return;
    ResPath = workspace+"/"+run_name;
    if os.path.isdir(ResPath):
        print("run name is already in workspace");
        return;
    else:
        os.mkdir(ResPath);
        
    shstr = "";
    shstr +="wd=\"" +ResPath+  "\"\n"
    shstr +="cd ${wd}\n";
        
    for i in samples:
        jobstr="cellranger count ";
        jobstr+="--id "+i+" ";
        jobstr+="--fastqs "+fastq_path+" "
        jobstr+="--transcriptome "+transcriptpath+" ";
        jobstr+="--expect-cells "+ str(expect_cells)+" "
        jobstr+="--sample=\""+i+"\"";

        shstr+=jobstr+"\n";


    if len(samples) >1:
        csvstr=["library_id,molecule_h5"]
        for i in samples:
            temp=i+","+ResPath+"/"+i+"/outs/molecule_info.h5"
            csvstr.append(temp)

            csvstr="\n".join(csvstr);
            
        csvf=ResPath+"/"+run_name+".csv"
        with open(csvf,"w") as f:
            f.write(csvstr)
                
        shstr+="cellranger aggr "
        shstr+="--id aggr " 
        #--csv=test.csv --normalize=mapped
        shstr+= "--csv="+csvf+" --normalize=mapped"

        resultfile = "aggr";

    else:
        resultfile =samples[0]
            
    shfile=ResPath+"/"+run_name+".sh"
    with open(shfile,"w") as f:
        f.write(shstr)
            
    command = "bash "+shfile;
        
    print("it will take a few hours . please wait.....")
    prun = subprocess.Popen(command, shell=True, stdin=PIPE, stdout=PIPE,stderr=STDOUT)
    output =prun.stdout.read();
    print(output)
    print("---------------------------------------------------------------------------------");
    print("finish");
    resultpath = ResPath+"/"+resultfile;
    print("results path: "+resultpath);
    CountsFile=resultpath+"/outs/filtered_feature_bc_matrix/";
        
    print("counts file path: "+ CountsFile);
        
    return CountsFile;

Run cell ranger

import scpipeline
p=scpipeline.ProcessPipline();
p.runCellRange(
    workspace="/path/to/workspace",#absolute address.
    fastq_path="/path/to/fastqfile",
    samples="", 
    expect_cells=4000, # cell ranger requirement
    transcriptpath="path to transcriptpath",
    run_name="test_run", #result file will save to workspace+"/"+run_name
)

How to run Cell Ranger from Notebook

If you have installed Cell Ranger, you can use it to process FASTQ files in Python Notebook.¶