Quickstart Tutorial

Tutorial

[2]:
import ipcoal
import toytree

1) Provide a species tree

[5]:
# init a model Class object for simulations
tree = toytree.rtree.unittree(8, treeheight=1e6)
[6]:
# draw tree showing idx labels
tree.draw(tree_style='c');
idx: 0 name: r4 dist: 200000 support: 100 height: 00idx: 1 name: r2 dist: 200000 support: 100 height: 01idx: 2 name: r0 dist: 400000 support: 100 height: 02idx: 3 name: r6 dist: 600000 support: 100 height: 03idx: 4 name: r7 dist: 800000 support: 100 height: 04idx: 5 name: r5 dist: 600000 support: 100 height: 05idx: 6 name: r1 dist: 600000 support: 100 height: 06idx: 7 name: r3 dist: 800000 support: 100 height: 07idx: 8 name: 8 dist: 200000 support: 100 height: 2000008idx: 9 name: 9 dist: 200000 support: 100 height: 4000009idx: 10 name: 10 dist: 200000 support: 100 height: 60000010idx: 11 name: 11 dist: 200000 support: 100 height: 60000011idx: 12 name: 12 dist: 200000 support: 100 height: 80000012idx: 13 name: 13 dist: 200000 support: 100 height: 80000013idx: 14 name: 14 dist: 200000 support: 100 height: 10000001405000001000000

2) Create a model object

[ ]:
model = ipcoal.Model(tree=tree, Ne=1e6)

3a) Simulate unlinked snps

[7]:
# simulate N unlinked SNPs (will run until N snps are produced)
model.sim_snps(1000)
[8]:
model.df
[8]:
locus start end nbps nsnps genealogy
0 0 0 1 1 1 (r6:3.33677e+06,((r3:737...
1 1 0 1 1 1 ((r3:1.02472e+06,r5:1.02...
2 2 0 1 1 1 (r0:4.08496e+06,((r7:2.4...
3 3 0 1 1 1 ((r2:1.05782e+06,r0:1.05...
4 4 0 1 1 1 ((r2:1.50159e+06,(r6:1.0...
... ... ... ... ... ... ...
995 995 0 1 1 1 ((r6:941804,r3:941804):1...
996 996 0 1 1 1 ((r5:2.38812e+06,r7:2.38...
997 997 0 1 1 1 ((r7:1.21434e+06,(r5:1.0...
998 998 0 1 1 1 ((r7:2.05576e+06,(r0:1.2...
999 999 0 1 1 1 ((r0:1.63448e+06,(r5:1.1...

1000 rows × 6 columns

[9]:
model.seqs
[9]:
array([[3, 1, 3, ..., 0, 3, 3],
       [2, 1, 1, ..., 0, 2, 2],
       [2, 1, 3, ..., 1, 2, 2],
       ...,
       [0, 3, 3, ..., 1, 3, 3],
       [2, 1, 1, ..., 0, 2, 3],
       [2, 1, 3, ..., 1, 3, 2]], dtype=uint8)

3b) Simulate a bunch of loci

[10]:
# simulate N loci of len L
model.sim_loci(100, 3000)
[11]:
# view the genealogies and stats in a table
model.df
[11]:
locus start end nbps nsnps genealogy
0 0 0 21 21 1 ((r7:1.56192e+06,(r3:1.1...
1 0 21 121 100 12 ((r7:1.56192e+06,(r3:1.1...
2 0 121 166 45 9 ((r7:1.56192e+06,(r3:1.1...
3 0 166 191 25 5 ((r7:1.56192e+06,(r3:1.1...
4 0 191 203 12 1 ((r7:1.56192e+06,(r3:1.1...
... ... ... ... ... ... ...
3842 99 2727 2809 82 23 ((r3:2.18634e+06,(r0:945...
3843 99 2809 2885 76 17 ((r3:2.18634e+06,(r0:945...
3844 99 2885 2927 42 12 ((r3:2.18634e+06,(r0:945...
3845 99 2927 2971 44 12 ((r3:992058,(r0:945210,r...
3846 99 2971 3000 29 7 (((r5:1.14777e+06,r7:1.1...

3847 rows × 6 columns

[12]:
# view sequence data as an array
model.seqs
[12]:
array([[[2, 1, 0, ..., 2, 2, 3],
        [2, 1, 0, ..., 2, 1, 3],
        [2, 1, 0, ..., 2, 1, 3],
        ...,
        [2, 1, 0, ..., 2, 2, 3],
        [2, 1, 0, ..., 2, 1, 3],
        [2, 1, 0, ..., 2, 1, 3]],

       [[2, 3, 3, ..., 0, 1, 2],
        [2, 3, 3, ..., 0, 1, 2],
        [2, 3, 3, ..., 0, 1, 2],
        ...,
        [2, 3, 3, ..., 0, 1, 2],
        [2, 3, 3, ..., 0, 1, 2],
        [2, 3, 3, ..., 0, 1, 2]],

       [[2, 1, 0, ..., 2, 3, 1],
        [2, 1, 0, ..., 2, 3, 1],
        [2, 1, 0, ..., 2, 0, 1],
        ...,
        [2, 1, 0, ..., 2, 0, 1],
        [2, 1, 0, ..., 2, 3, 1],
        [2, 1, 0, ..., 2, 0, 1]],

       ...,

       [[3, 0, 0, ..., 3, 2, 1],
        [3, 0, 0, ..., 3, 2, 1],
        [3, 0, 0, ..., 3, 2, 1],
        ...,
        [3, 0, 0, ..., 3, 2, 1],
        [3, 0, 0, ..., 3, 2, 1],
        [3, 0, 0, ..., 3, 2, 1]],

       [[2, 2, 2, ..., 0, 1, 1],
        [2, 2, 2, ..., 0, 1, 1],
        [2, 2, 2, ..., 0, 1, 1],
        ...,
        [2, 2, 2, ..., 0, 1, 1],
        [2, 2, 2, ..., 0, 1, 1],
        [2, 2, 2, ..., 0, 1, 1]],

       [[2, 1, 1, ..., 3, 2, 2],
        [2, 1, 1, ..., 3, 2, 2],
        [2, 1, 1, ..., 3, 2, 2],
        ...,
        [2, 1, 1, ..., 3, 2, 2],
        [2, 1, 1, ..., 3, 2, 2],
        [2, 1, 1, ..., 3, 2, 2]]], dtype=uint8)

4) Infer a gene tree for each locus

[13]:
# infer a tree for every locus
model.infer_gene_trees(inference_method='raxml')
[14]:
# inferred_tree column is now added to the dataframe!
model.df
[14]:
locus start end nbps nsnps genealogy inferred_tree
0 0 0 21 21 1 ((r7:1.56192e+06,(r3:1.1... ((r4:0.02105516863155426...
1 0 21 121 100 12 ((r7:1.56192e+06,(r3:1.1... ((r4:0.02105516863155426...
2 0 121 166 45 9 ((r7:1.56192e+06,(r3:1.1... ((r4:0.02105516863155426...
3 0 166 191 25 5 ((r7:1.56192e+06,(r3:1.1... ((r4:0.02105516863155426...
4 0 191 203 12 1 ((r7:1.56192e+06,(r3:1.1... ((r4:0.02105516863155426...
... ... ... ... ... ... ... ...
3842 99 2727 2809 82 23 ((r3:2.18634e+06,(r0:945... ((r3:0.01625070763953974...
3843 99 2809 2885 76 17 ((r3:2.18634e+06,(r0:945... ((r3:0.01625070763953974...
3844 99 2885 2927 42 12 ((r3:2.18634e+06,(r0:945... ((r3:0.01625070763953974...
3845 99 2927 2971 44 12 ((r3:992058,(r0:945210,r... ((r3:0.01625070763953974...
3846 99 2971 3000 29 7 (((r5:1.14777e+06,r7:1.1... ((r3:0.01625070763953974...

3847 rows × 7 columns