cs3223: finished
This commit is contained in:
Binary file not shown.
@@ -166,6 +166,10 @@
|
|||||||
$pi_L (R)$, [project column by list $L$ from $R$],
|
$pi_L (R)$, [project column by list $L$ from $R$],
|
||||||
$pi_L^* (R)$, [project with duplicates],
|
$pi_L^* (R)$, [project with duplicates],
|
||||||
|
|
||||||
|
$b_d$, [Data records that can fit on page],
|
||||||
|
$b_i$, [Data entries that can fit on page],
|
||||||
|
$b_r$, [RIDs that can fit on page],
|
||||||
|
|
||||||
|
|
||||||
)
|
)
|
||||||
== External Merge Sort
|
== External Merge Sort
|
||||||
@@ -233,9 +237,12 @@
|
|||||||
- *Conjunct*: $1>=$ terms connected by $or$
|
- *Conjunct*: $1>=$ terms connected by $or$
|
||||||
- *CNF predicate*: $1>=$ conjuncts connected by $and$
|
- *CNF predicate*: $1>=$ conjuncts connected by $and$
|
||||||
- *Covered Conjunct* - predicate $p_i$ is covered conjunct if each attribute in $p_i$ is in key $K$ or include column of Index $I$
|
- *Covered Conjunct* - predicate $p_i$ is covered conjunct if each attribute in $p_i$ is in key $K$ or include column of Index $I$
|
||||||
- $sigma_p (R), p = ("age" > 5) and ("height" = 180) and ("level" = 3), I_1 "key" = ("level", "weight", "height"$
|
- $p = ("age" > 5) and ("height" = 180) and ("level" = 3)$
|
||||||
- $p_c =
|
- $I_1 "key" = ("level", "weight", "height")$
|
||||||
- *Primary Conjunct* -
|
- $p_c "wrt" I_1 = ("height" = 180) and ("level" = 3)$
|
||||||
|
- *Primary Conjunct*
|
||||||
|
- $I$ matches $p$ if attributes in $p$ form prefix of $K$ and all comparison operators are equality except last
|
||||||
|
- $p_p$ is largest subset of conjuncts in $p$ such that $I$ matches $p_p$
|
||||||
- $sigma_p (R)$: Select rows from $R$ that satisfy predicate $p$
|
- $sigma_p (R)$: Select rows from $R$ that satisfy predicate $p$
|
||||||
- Access Path: way of accessing data records / entries
|
- Access Path: way of accessing data records / entries
|
||||||
- *Table Scan*: Scan all data pages (Cost: $|R|$)
|
- *Table Scan*: Scan all data pages (Cost: $|R|$)
|
||||||
@@ -248,3 +255,44 @@
|
|||||||
== B+ Trees
|
== B+ Trees
|
||||||
- For Index Scan + RID Lookup, many matching RIDs could refer to same page
|
- For Index Scan + RID Lookup, many matching RIDs could refer to same page
|
||||||
- Sort matching RIDs before performing lookup: Avoid retrieving same page
|
- Sort matching RIDs before performing lookup: Avoid retrieving same page
|
||||||
|
|
||||||
|
=== Analysis
|
||||||
|
#{
|
||||||
|
let nin = [$N_"internal"$]
|
||||||
|
let nle = [$N_"leaf"$]
|
||||||
|
let nlo = [$N_"lookup"$]
|
||||||
|
let nso = [$N_"sort"$]
|
||||||
|
let nco = [$N_"combine"$]
|
||||||
|
[
|
||||||
|
Cost of index scan = $nin + #nle + nlo$
|
||||||
|
- *#nin*: No of internal nodes accessed
|
||||||
|
- Height of B+ tree index
|
||||||
|
- $ "height(est)" = cases(
|
||||||
|
ceil(log_F (ceil( (||R||) / b_d))) &"if index is clustered",
|
||||||
|
ceil(log_F (ceil( (||R||) / b_i))) &"otherwise",
|
||||||
|
) $
|
||||||
|
- *#nlo*: Data pages accessed for RID lookups
|
||||||
|
- If $I$ is covering index for $sigma_p (R), nlo = 0$
|
||||||
|
- else $nlo = ||sigma_p_c (R)||$
|
||||||
|
- If matching RIDs are sorted before RID lookup
|
||||||
|
- $nlo = nso + min{||sigma_p_c (R)||, |R|}$
|
||||||
|
- *#nso*: sorting matching RIDs
|
||||||
|
- $nso = 0 "if" ceil( (||sigma_p_c (R)||) / b_r ) <= B$ (if RIDs can fit into $B$)
|
||||||
|
- $ nso = 2 ceil((||sigma_p_c (R)||) / b_r) ceil(log_(B-1) (N_0)), N_0 = ceil(ceil((||sigma_p_c (R)||) / b_r) / B) $
|
||||||
|
- Sorting with External Merge Sort
|
||||||
|
- #nso does'nt include read IO for pass 0 as its included in #nin and #nle
|
||||||
|
- #nso does'nt incldue write IO for final merging pass as RID is used for lookup
|
||||||
|
- *#nle*: Leaf pages scanned for evaluating $sigma_p (R)$
|
||||||
|
- $nle = ceil((||sigma_p_p (R)||)/b_d)$ if clustered
|
||||||
|
- $nle = ceil((||sigma_p_p (R)||)/b_d)$ if unclustered
|
||||||
|
- *Index Combination*
|
||||||
|
- Cost = $nin^p + nle^p + nin^q + nle^q + nco + nlo$
|
||||||
|
- #nco: IO cost to compute join of $pi_p$ $pi_q$
|
||||||
|
- If $min{|pi_X_p (S_p)|, |pi_X_q (S_q)|} <= B$
|
||||||
|
- One of the join operands can fit in mem, then $nco = 0$
|
||||||
|
== Hash based Index Scan
|
||||||
|
- Cost: $N_"dir" + N_"bucket" + nlo$
|
||||||
|
- $N_"dir"$: no of directory pages accessed (1 if extensible hash, 0 otherwise)
|
||||||
|
- $N_"bucket"$: max no of index's primary/overflow pages accessed
|
||||||
|
- $nlo= nso + min{||sigma_p_c (R)||, |R|}$ if I is not covering index for $sigma_p (R)$
|
||||||
|
]}
|
||||||
|
|||||||
Reference in New Issue
Block a user