Compute Differentiallt Expressed Genes
import requests
from pprint import pprint
import pandas as pd
import numpy as np
import diffxpy.api as de
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats
import time
import scanpy.api as sc
Your app URL with port
appUrl="http://54.159.6.229:8002/"
Retrieve expression matrix using API
## HTTP api url
url=appUrl+"api/getNormalizedGeneExprByTwoClstrs";
## collect normalized counts data from two cell types: Tcells and Bcells
data={
"mapid":"5c8bf53ea05a37a5c7d707ce",
"clstrType1":"cellType",
"clstrName1":"Tcells",
"clstrType2":"cellType",
"clstrName2":"Bcells",
"zscoreFilter":"" , # if set "" , skip the z score filter
"log2fc":"" # if set "" , skip the fold change filter
}
## use API
res = requests.post(url, data=data)
res=res.json();
pprint(len(res.keys()))
# create adata object
data=[];
genes=[];
key=list(res.keys())[0]
g1len= len(res[key][0]);
g2len= len(res[key][1]);
condition = [0]*g1len+[1]*g2len;
for i in res:
genes.append(i);
data.append(res[i][0]+res[i][1])
data=np.array(data,dtype="float32");
adata=sc.AnnData(X=data);
adata.obs.index=genes;
adata=adata.T;
adata.obs["condition"]=condition
run DEG test using Diffxpy from Theis Lab
one p-value is computed per gene
test_w = de.test.wilcoxon(
data=adata,
grouping="condition",
);
test_w.summary().sort_values(by=["qval","log2fc"],ascending=True).head()
above table contain B cells specific genes compared to T cells