Date
1 - 2 of 2
Union with Count returning unexpected results
Vinayak Bali
Hi All,
The objective is to count the number of nodes and edges.
Query:
g2.inject(1).union(V().has('title', 'A').aggregate('v1').union(outE().has('title', 'E1').aggregate('e').inV().has('title', 'B'),outE().has('title', 'E2').aggregate('e').inV().has('title', 'C')).aggregate('v2')).select('v1').dedup().as('sourceCount').select('e').dedup().as('edgeCount').select('v2').dedup().as('destinationCount').select('sourceCount','edgeCount','destinationCount').by(unfold().count())
[
{
"sourceCount": 1203,
"edgeCount": 9922,
"destinationCount": 9926
}
]
{
"sourceCount": 1203,
"edgeCount": 9922,
"destinationCount": 9926
}
]
But when the aggregate query is placed inside the union query to count each type of node for the inner union the results are different.
Query:
g2.inject(1).union(V().has('title', 'A').aggregate('A').union(outE().has('title', 'E1').aggregate('E1').inV().has('title', 'B').aggregate('B'),outE().has('title', 'E2').aggregate('E2').inV().has('title', 'C').aggregate('C'))).select('A').dedup().as('ACount').select('E1').dedup().as('E1Count').select('B').dedup().as('BCount').select('E2').dedup().as('E2Count').select('C').dedup().as('CCount').select('ACount','E1Count','BCount','E2Count','CCount').by(unfold().count())
[
{
"vendorCount": 1203,
"supply1Count": 4,
"productCount": 4,
"supplyCount": 0,
"materialCount": 0
}
]
{
"vendorCount": 1203,
"supply1Count": 4,
"productCount": 4,
"supplyCount": 0,
"materialCount": 0
}
]
The nodes and edges count doesn't match after applying a small change. Request you take a look and share your thoughts.
Thanks & Regards,
Vinayak
hadoopmarc@...
Hi Vinayak,
I guess this has to do with differences in lazy vs eager evaluation between the two queries. The TinkerPop ref docs reference the aggregated values with cap('ACount','E1Count','BCount','E2Count','CCount'), rather than with select(), to force eager evaluation, see: https://tinkerpop.apache.org/docs/current/reference/#store-step
Best wishes, Marc
For other readers, please find the queries from the original post in a better readable format:
g2.inject(1).union(
V().has('title', 'A').aggregate('v1').union(
outE().has('title', 'E1').aggregate('e').inV().has('title', 'B'),
outE().has('title', 'E2').aggregate('e').inV().has('title','C')
).aggregate('v2')
).
select('v1').dedup().as('sourceCount').
select('e').dedup().as('edgeCount').
select('v2').dedup().as('destinationCount').
select('sourceCount','edgeCount','destinationCount').by(unfold().count())
g2.inject(1).union(
V().has('title', 'A').aggregate('A').union(
outE().has('title', 'E1').aggregate('E1').inV().has('title', 'B').aggregate('B'),
outE().has('title', 'E2').aggregate('E2').inV().has('title','C').aggregate('C')
)
).
select('A').dedup().as('ACount').
select('E1').dedup().as('E1Count').
select('B').dedup().as('BCount').
select('E2').dedup().as('E2Count').
select('C').dedup().as('CCount').
select('ACount','E1Count','BCount','E2Count','CCount').by(unfold().count())
I guess this has to do with differences in lazy vs eager evaluation between the two queries. The TinkerPop ref docs reference the aggregated values with cap('ACount','E1Count','BCount','E2Count','CCount'), rather than with select(), to force eager evaluation, see: https://tinkerpop.apache.org/docs/current/reference/#store-step
Best wishes, Marc
For other readers, please find the queries from the original post in a better readable format:
g2.inject(1).union(
V().has('title', 'A').aggregate('v1').union(
outE().has('title', 'E1').aggregate('e').inV().has('title', 'B'),
outE().has('title', 'E2').aggregate('e').inV().has('title','C')
).aggregate('v2')
).
select('v1').dedup().as('sourceCount').
select('e').dedup().as('edgeCount').
select('v2').dedup().as('destinationCount').
select('sourceCount','edgeCount','destinationCount').by(unfold().count())
g2.inject(1).union(
V().has('title', 'A').aggregate('A').union(
outE().has('title', 'E1').aggregate('E1').inV().has('title', 'B').aggregate('B'),
outE().has('title', 'E2').aggregate('E2').inV().has('title','C').aggregate('C')
)
).
select('A').dedup().as('ACount').
select('E1').dedup().as('E1Count').
select('B').dedup().as('BCount').
select('E2').dedup().as('E2Count').
select('C').dedup().as('CCount').
select('ACount','E1Count','BCount','E2Count','CCount').by(unfold().count())