提交 5ff552cd 作者: Jeromy Johnson 提交者: GitHub

Merge pull request #3700 from ipfs/kevina/enumerate-children-refactor

Refactor EnumerateChildren to avoid need for bestEffort paramater.
...@@ -370,7 +370,7 @@ func provideKeysRec(ctx context.Context, r routing.IpfsRouting, dserv dag.DAGSer ...@@ -370,7 +370,7 @@ func provideKeysRec(ctx context.Context, r routing.IpfsRouting, dserv dag.DAGSer
for _, c := range cids { for _, c := range cids {
kset := cid.NewSet() kset := cid.NewSet()
err := dag.EnumerateChildrenAsync(ctx, dserv, c, kset.Visit) err := dag.EnumerateChildrenAsync(ctx, dag.GetLinksDirect(dserv), c, kset.Visit)
if err != nil { if err != nil {
return err return err
} }
......
...@@ -400,7 +400,7 @@ func pinLsAll(typeStr string, ctx context.Context, n *core.IpfsNode) (map[string ...@@ -400,7 +400,7 @@ func pinLsAll(typeStr string, ctx context.Context, n *core.IpfsNode) (map[string
if typeStr == "indirect" || typeStr == "all" { if typeStr == "indirect" || typeStr == "all" {
set := cid.NewSet() set := cid.NewSet()
for _, k := range n.Pinning.RecursiveKeys() { for _, k := range n.Pinning.RecursiveKeys() {
err := dag.EnumerateChildren(n.Context(), n.DAG, k, set.Visit, false) err := dag.EnumerateChildren(n.Context(), n.DAG.GetLinks, k, set.Visit)
if err != nil { if err != nil {
return nil, err return nil, err
} }
......
...@@ -163,7 +163,7 @@ func TestAddGCLive(t *testing.T) { ...@@ -163,7 +163,7 @@ func TestAddGCLive(t *testing.T) {
defer cancel() defer cancel()
set := cid.NewSet() set := cid.NewSet()
err = dag.EnumerateChildren(ctx, node.DAG, last, set.Visit, false) err = dag.EnumerateChildren(ctx, node.DAG.GetLinks, last, set.Visit)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
......
...@@ -36,8 +36,10 @@ type DAGService interface { ...@@ -36,8 +36,10 @@ type DAGService interface {
} }
type LinkService interface { type LinkService interface {
// Return all links for a node, may be more effect than // GetLinks return all links for a node. The complete node does not
// calling Get in DAGService // necessarily have to exist locally, or at all. For example, raw
// leaves cannot possibly have links so there is no need to look
// at the node.
GetLinks(context.Context, *cid.Cid) ([]*node.Link, error) GetLinks(context.Context, *cid.Cid) ([]*node.Link, error)
GetOfflineLinkService() LinkService GetOfflineLinkService() LinkService
...@@ -114,6 +116,8 @@ func decodeBlock(b blocks.Block) (node.Node, error) { ...@@ -114,6 +116,8 @@ func decodeBlock(b blocks.Block) (node.Node, error) {
} }
} }
// GetLinks return the links for the node, the node doesn't necessarily have
// to exist locally.
func (n *dagService) GetLinks(ctx context.Context, c *cid.Cid) ([]*node.Link, error) { func (n *dagService) GetLinks(ctx context.Context, c *cid.Cid) ([]*node.Link, error) {
if c.Type() == cid.Raw { if c.Type() == cid.Raw {
return nil, nil return nil, nil
...@@ -138,11 +142,24 @@ func (n *dagService) Remove(nd node.Node) error { ...@@ -138,11 +142,24 @@ func (n *dagService) Remove(nd node.Node) error {
return n.Blocks.DeleteBlock(nd) return n.Blocks.DeleteBlock(nd)
} }
// GetLinksDirect creates a function to get the links for a node, from
// the node, bypassing the LinkService. If the node does not exist
// locally (and can not be retrieved) an error will be returned.
func GetLinksDirect(serv DAGService) GetLinks {
return func(ctx context.Context, c *cid.Cid) ([]*node.Link, error) {
node, err := serv.Get(ctx, c)
if err != nil {
return nil, err
}
return node.Links(), nil
}
}
// FetchGraph fetches all nodes that are children of the given node // FetchGraph fetches all nodes that are children of the given node
func FetchGraph(ctx context.Context, root *cid.Cid, serv DAGService) error { func FetchGraph(ctx context.Context, root *cid.Cid, serv DAGService) error {
v, _ := ctx.Value("progress").(*ProgressTracker) v, _ := ctx.Value("progress").(*ProgressTracker)
if v == nil { if v == nil {
return EnumerateChildrenAsync(ctx, serv, root, cid.NewSet().Visit) return EnumerateChildrenAsync(ctx, GetLinksDirect(serv), root, cid.NewSet().Visit)
} }
set := cid.NewSet() set := cid.NewSet()
visit := func(c *cid.Cid) bool { visit := func(c *cid.Cid) bool {
...@@ -153,7 +170,7 @@ func FetchGraph(ctx context.Context, root *cid.Cid, serv DAGService) error { ...@@ -153,7 +170,7 @@ func FetchGraph(ctx context.Context, root *cid.Cid, serv DAGService) error {
return false return false
} }
} }
return EnumerateChildrenAsync(ctx, serv, root, visit) return EnumerateChildrenAsync(ctx, GetLinksDirect(serv), root, visit)
} }
// FindLinks searches this nodes links for the given key, // FindLinks searches this nodes links for the given key,
...@@ -380,20 +397,20 @@ func (t *Batch) Commit() error { ...@@ -380,20 +397,20 @@ func (t *Batch) Commit() error {
return err return err
} }
type GetLinks func(context.Context, *cid.Cid) ([]*node.Link, error)
// EnumerateChildren will walk the dag below the given root node and add all // EnumerateChildren will walk the dag below the given root node and add all
// unseen children to the passed in set. // unseen children to the passed in set.
// TODO: parallelize to avoid disk latency perf hits? // TODO: parallelize to avoid disk latency perf hits?
func EnumerateChildren(ctx context.Context, ds LinkService, root *cid.Cid, visit func(*cid.Cid) bool, bestEffort bool) error { func EnumerateChildren(ctx context.Context, getLinks GetLinks, root *cid.Cid, visit func(*cid.Cid) bool) error {
links, err := ds.GetLinks(ctx, root) links, err := getLinks(ctx, root)
if bestEffort && err == ErrNotFound { if err != nil {
return nil
} else if err != nil {
return err return err
} }
for _, lnk := range links { for _, lnk := range links {
c := lnk.Cid c := lnk.Cid
if visit(c) { if visit(c) {
err = EnumerateChildren(ctx, ds, c, visit, bestEffort) err = EnumerateChildren(ctx, getLinks, c, visit)
if err != nil { if err != nil {
return err return err
} }
...@@ -427,9 +444,9 @@ func (p *ProgressTracker) Value() int { ...@@ -427,9 +444,9 @@ func (p *ProgressTracker) Value() int {
// 'fetchNodes' will start at a time // 'fetchNodes' will start at a time
var FetchGraphConcurrency = 8 var FetchGraphConcurrency = 8
func EnumerateChildrenAsync(ctx context.Context, ds DAGService, c *cid.Cid, visit func(*cid.Cid) bool) error { func EnumerateChildrenAsync(ctx context.Context, getLinks GetLinks, c *cid.Cid, visit func(*cid.Cid) bool) error {
feed := make(chan *cid.Cid) feed := make(chan *cid.Cid)
out := make(chan node.Node) out := make(chan []*node.Link)
done := make(chan struct{}) done := make(chan struct{})
var setlk sync.Mutex var setlk sync.Mutex
...@@ -442,7 +459,7 @@ func EnumerateChildrenAsync(ctx context.Context, ds DAGService, c *cid.Cid, visi ...@@ -442,7 +459,7 @@ func EnumerateChildrenAsync(ctx context.Context, ds DAGService, c *cid.Cid, visi
for i := 0; i < FetchGraphConcurrency; i++ { for i := 0; i < FetchGraphConcurrency; i++ {
go func() { go func() {
for ic := range feed { for ic := range feed {
n, err := ds.Get(ctx, ic) links, err := getLinks(ctx, ic)
if err != nil { if err != nil {
errChan <- err errChan <- err
return return
...@@ -454,7 +471,7 @@ func EnumerateChildrenAsync(ctx context.Context, ds DAGService, c *cid.Cid, visi ...@@ -454,7 +471,7 @@ func EnumerateChildrenAsync(ctx context.Context, ds DAGService, c *cid.Cid, visi
if unseen { if unseen {
select { select {
case out <- n: case out <- links:
case <-fetchersCtx.Done(): case <-fetchersCtx.Done():
return return
} }
...@@ -489,8 +506,8 @@ func EnumerateChildrenAsync(ctx context.Context, ds DAGService, c *cid.Cid, visi ...@@ -489,8 +506,8 @@ func EnumerateChildrenAsync(ctx context.Context, ds DAGService, c *cid.Cid, visi
if inProgress == 0 && next == nil { if inProgress == 0 && next == nil {
return nil return nil
} }
case nd := <-out: case links := <-out:
for _, lnk := range nd.Links() { for _, lnk := range links {
if next == nil { if next == nil {
next = lnk.Cid next = lnk.Cid
send = feed send = feed
......
...@@ -249,7 +249,7 @@ func TestFetchGraph(t *testing.T) { ...@@ -249,7 +249,7 @@ func TestFetchGraph(t *testing.T) {
offline_ds := NewDAGService(bs) offline_ds := NewDAGService(bs)
err = EnumerateChildren(context.Background(), offline_ds, root.Cid(), func(_ *cid.Cid) bool { return true }, false) err = EnumerateChildren(context.Background(), offline_ds.GetLinks, root.Cid(), func(_ *cid.Cid) bool { return true })
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
...@@ -266,7 +266,7 @@ func TestEnumerateChildren(t *testing.T) { ...@@ -266,7 +266,7 @@ func TestEnumerateChildren(t *testing.T) {
} }
set := cid.NewSet() set := cid.NewSet()
err = EnumerateChildren(context.Background(), ds, root.Cid(), set.Visit, false) err = EnumerateChildren(context.Background(), ds.GetLinks, root.Cid(), set.Visit)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
...@@ -543,7 +543,7 @@ func TestEnumerateAsyncFailsNotFound(t *testing.T) { ...@@ -543,7 +543,7 @@ func TestEnumerateAsyncFailsNotFound(t *testing.T) {
} }
cset := cid.NewSet() cset := cid.NewSet()
err = EnumerateChildrenAsync(context.Background(), ds, pcid, cset.Visit) err = EnumerateChildrenAsync(context.Background(), GetLinksDirect(ds), pcid, cset.Visit)
if err == nil { if err == nil {
t.Fatal("this should have failed") t.Fatal("this should have failed")
} }
......
...@@ -9,6 +9,7 @@ import ( ...@@ -9,6 +9,7 @@ import (
logging "gx/ipfs/QmSpJByNKFX1sCsHBEp3R73FL4NF6FnQTEGyNAXHm2GS52/go-log" logging "gx/ipfs/QmSpJByNKFX1sCsHBEp3R73FL4NF6FnQTEGyNAXHm2GS52/go-log"
cid "gx/ipfs/QmV5gPoRsjN1Gid3LMdNZTyfCtP2DsvqEbMAmz82RmmiGk/go-cid" cid "gx/ipfs/QmV5gPoRsjN1Gid3LMdNZTyfCtP2DsvqEbMAmz82RmmiGk/go-cid"
node "gx/ipfs/QmYDscK7dmdo2GZ9aumS8s5auUUAH5mR1jvj5pYhWusfK7/go-ipld-node"
) )
var log = logging.Logger("gc") var log = logging.Logger("gc")
...@@ -68,12 +69,12 @@ func GC(ctx context.Context, bs bstore.GCBlockstore, ls dag.LinkService, pn pin. ...@@ -68,12 +69,12 @@ func GC(ctx context.Context, bs bstore.GCBlockstore, ls dag.LinkService, pn pin.
return output, nil return output, nil
} }
func Descendants(ctx context.Context, ls dag.LinkService, set *cid.Set, roots []*cid.Cid, bestEffort bool) error { func Descendants(ctx context.Context, getLinks dag.GetLinks, set *cid.Set, roots []*cid.Cid) error {
for _, c := range roots { for _, c := range roots {
set.Add(c) set.Add(c)
// EnumerateChildren recursively walks the dag and adds the keys to the given set // EnumerateChildren recursively walks the dag and adds the keys to the given set
err := dag.EnumerateChildren(ctx, ls, c, set.Visit, bestEffort) err := dag.EnumerateChildren(ctx, getLinks, c, set.Visit)
if err != nil { if err != nil {
return err return err
} }
...@@ -86,12 +87,19 @@ func ColoredSet(ctx context.Context, pn pin.Pinner, ls dag.LinkService, bestEffo ...@@ -86,12 +87,19 @@ func ColoredSet(ctx context.Context, pn pin.Pinner, ls dag.LinkService, bestEffo
// KeySet currently implemented in memory, in the future, may be bloom filter or // KeySet currently implemented in memory, in the future, may be bloom filter or
// disk backed to conserve memory. // disk backed to conserve memory.
gcs := cid.NewSet() gcs := cid.NewSet()
err := Descendants(ctx, ls, gcs, pn.RecursiveKeys(), false) err := Descendants(ctx, ls.GetLinks, gcs, pn.RecursiveKeys())
if err != nil { if err != nil {
return nil, err return nil, err
} }
err = Descendants(ctx, ls, gcs, bestEffortRoots, true) bestEffortGetLinks := func(ctx context.Context, cid *cid.Cid) ([]*node.Link, error) {
links, err := ls.GetLinks(ctx, cid)
if err == dag.ErrNotFound {
err = nil
}
return links, err
}
err = Descendants(ctx, bestEffortGetLinks, gcs, bestEffortRoots)
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -100,7 +108,7 @@ func ColoredSet(ctx context.Context, pn pin.Pinner, ls dag.LinkService, bestEffo ...@@ -100,7 +108,7 @@ func ColoredSet(ctx context.Context, pn pin.Pinner, ls dag.LinkService, bestEffo
gcs.Add(k) gcs.Add(k)
} }
err = Descendants(ctx, ls, gcs, pn.InternalPins(), false) err = Descendants(ctx, ls.GetLinks, gcs, pn.InternalPins())
if err != nil { if err != nil {
return nil, err return nil, err
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论