Author Topic: [SQL] Duplicate rows with multiple LEFT JOINs  (Read 2974 times)

Hurra

  • EA User
  • **
  • Posts: 183
  • Karma: +0/-0
    • View Profile
    • Find me at LinkedIn!
[SQL] Duplicate rows with multiple LEFT JOINs
« on: February 20, 2023, 08:11:02 pm »
Hello!

I have a query where I get all requirements in a branch.

Then I left join with another group or requirements and LEFT JOIN based on connector (deriveReqt).

Then I left join again with a group of system element and LEFT JOIN based on connector (satisfy).

So the setup is the following:

Code: [Select]
(system element) -- satisfy --> (system requirement) -- deriveReqt --> (stakeholder requirement)
Initially my output was two columns, one for ID of system requirements, and one for related stakeholder requirements which was grouped so the output would be something like this example:

System requirement ID | derived Stakeholder requirement ID
1 | 3
2 | 5, 8
3 | 6, 11
4 | 1, 7

But when I added a third column with system elements which satisfy the system requirements, the stakeholder column gets duplicated with the number of related system elements, for example:

System requirement ID | derived Stakeholder requirement ID | System element
1 | 3 | SE-1
2 | 5, 8, 5, 8 | SE-1, SE-2
3 | 6, 11 | SE-3
4 | 7, 7, 7 | SE-1, SE-2, SE-3

I can clearly see the connection between duplication of 'derived Stakeholder requirement ID' and the number of related 'System element'.

But I have no idea how I fix that.

The desired output would be:

System requirement ID | derived Stakeholder requirement ID | System element
1 | 3 | SE-1
2 | 5, 8 | SE-1, SE-2
3 | 6, 11 | SE-3
4 | 7 | SE-1, SE-2, SE-3

Full SQL:
Code: [Select]
SELECT SK_TABLE.CLASSGUID, SK_TABLE.CLASSTYPE,
SK_TABLE.SK_ID AS SK_ID,
SK_TABLE.SK_KAPITEL AS SK_KAPITEL,
SK_TABLE.SK_TEXT AS SK_TEXT,
COALESCE(GROUP_CONCAT(IK_TABLE.IK_ID, '; '), '---') AS IK_ID,
COALESCE(GROUP_CONCAT(SE_TABLE.NAME, '; '), '---') AS SE_NAME

FROM
(SELECT SK.ea_guid AS CLASSGUID, SK.Object_Type AS CLASSTYPE, SK.Object_ID AS SK_OID,
SK_ID.Value AS SK_ID, SK_PACKAGE.Name AS SK_KAPITEL, SK_TEXT.Notes AS SK_TEXT
FROM t_object SK
INNER JOIN t_objectproperties SK_ID ON SK_ID.Object_ID = SK.Object_ID AND SK_ID.Property = 'id'
INNER JOIN t_objectproperties SK_TEXT ON SK_TEXT.Object_ID = SK.Object_ID AND SK_TEXT.Property = 'text'
INNER JOIN t_package SK_PACKAGE ON SK_PACKAGE.Package_ID = SK.Package_ID
WHERE SK.Package_ID IN (#Branch='{4FD8BE48-EE95-44ae-ACCB-A7CE7851EB7C}'#)
AND SK.Stereotype = 'Systemkrav')
SK_TABLE

LEFT JOIN
(SELECT IK_ID.Value AS IK_ID, DERIVE.Start_Object_ID AS 'DERIVE_END'
FROM t_object IK
INNER JOIN t_objectproperties IK_ID ON IK_ID.Object_ID = IK.Object_ID AND IK_ID.Property = 'id'
INNER JOIN t_connector DERIVE ON DERIVE.End_Object_ID = IK.Object_ID AND DERIVE.Stereotype = 'deriveReqt'
WHERE IK.Package_ID IN (#Branch='{20273ECC-AB9C-43e3-8E6D-43E7DD60F3AC}'#)
AND IK.Stereotype = 'Intressentkrav')
IK_TABLE ON SK_TABLE.SK_OID = IK_TABLE.DERIVE_END

LEFT JOIN (
SELECT SE.Name AS NAME, SATISFY.End_Object_ID AS SATISFY_END
FROM t_object SE
INNER JOIN t_connector SATISFY ON SATISFY.Start_Object_ID = SE.Object_ID AND SATISFY.Stereotype = 'satisfy'
WHERE SE.Package_ID IN (#Branch='{95E5A654-A217-4122-B011-045E9F7A720D}'#)
AND SE.Stereotype = 'OperationalPerformer')
SE_TABLE ON SE_TABLE.SATISFY_END = SK_TABLE.SK_OID

GROUP BY SK_TABLE.CLASSGUID
ORDER BY LENGTH(SK_TABLE.SK_ID), SK_TABLE.SK_ID

Any pointers of how I would fix that would be appreciated!

When googling I found something using WITH, but IIRC SQL statements in Sparx EA MUST start with SELECT so didn't even bother to try it out.

Cheers!
always learning!

Geert Bellekens

  • EA Guru
  • *****
  • Posts: 13274
  • Karma: +556/-33
  • Make EA work for YOU!
    • View Profile
    • Enterprise Architect Consultant and Value Added Reseller
Re: [SQL] Duplicate rows with multiple LEFT JOINs
« Reply #1 on: February 20, 2023, 08:44:39 pm »
Which database?

In recent SQL Server you can use the function STRING_AGGREGATE to group your requiements into a single row.

Here's an example from one of my queries:

Code: [Select]
select * from
(select distinct
(select STRING_AGG( u.Name, char(10))
from t_connector c
inner join t_object u on u.Object_ID = c.Start_Object_ID
and u.Stereotype = 'EAM_Contact'
and exists (select gd.Connector_ID from t_connector gd
where gd.Start_Object_ID = u.Object_ID
and gd.Stereotype = 'ArchiMate_Aggregation'
and gd.End_Object_ID in ("& domain.ElementID &")
)
inner join t_objectproperties tv on tv.Object_ID = u.Object_ID
and tv.Property = 'IsDisabled'
and isnull(tv.Value, 'False') <> 'True'
where c.Stereotype = 'ArchiMate_Assignment'
and c.End_Object_ID = f.FunctionID
 ) as Users
,f.FunctionName
,f.FunctionDescription
,(select STRING_AGG( dep.Name, char(10))
from t_connector c
inner join t_object dep on dep.Object_ID = c.End_Object_ID
and dep.Stereotype = 'ArchiMate_BusinessCollaboration'
where c.Stereotype = 'ArchiMate_Aggregation'
and c.Start_Object_ID = f.FunctionID
 ) as DeelVan
,f.RoleName
, f.RoleDescription
,cg.Name as BusinessRoleGroup
from
       (select cg.Object_ID, cg.Name, cg.Note
       from  t_object cg
       where cg.Stereotype = 'EAM_UserGroup'
       and cg.Name like 'BG%'
   and exists (select gd.Connector_ID from t_connector gd
where gd.Start_Object_ID = cg.Object_ID
and gd.Stereotype = 'ArchiMate_Aggregation'
and gd.End_Object_ID in ("& domain.ElementID &")
)
   ) cg
full outer join
       (select distinct cp.Start_Object_ID
   ,f.Object_ID as FunctionID
   ,f.Name as FunctionName
   ,f.Note as FunctionDescription
   ,pr.Name as RoleName
   ,pr.Note as RoleDescription
       from t_object f
       left join t_connector cf on f.Object_ID = cf.End_Object_ID
                                        and cf.Stereotype = 'trace'
       left join t_object pr on  cf.Start_Object_ID = pr.Object_ID
                                        and pr.Stereotype = 'PartnerRole'
       left join t_connector cp on  pr.Object_ID = cp.End_Object_ID
                                        and cp.Stereotype = 'EAM_UserGroupMember'
       where f.Stereotype = 'EAM_Function'
       ) f on f.Start_Object_ID = cg.Object_ID
) f
order by isnull(f.FunctionName, 'ZZZ'), isnull(f.RoleName, 'ZZZ')

Geert

Hurra

  • EA User
  • **
  • Posts: 183
  • Karma: +0/-0
    • View Profile
    • Find me at LinkedIn!
Re: [SQL] Duplicate rows with multiple LEFT JOINs
« Reply #2 on: February 20, 2023, 09:02:23 pm »
Which database?

In recent SQL Server you can use the function STRING_AGGREGATE to group your requiements into a single row.

Here's an example from one of my queries:

Code: [Select]
select * from
(select distinct
(select STRING_AGG( u.Name, char(10))
from t_connector c
inner join t_object u on u.Object_ID = c.Start_Object_ID
and u.Stereotype = 'EAM_Contact'
and exists (select gd.Connector_ID from t_connector gd
where gd.Start_Object_ID = u.Object_ID
and gd.Stereotype = 'ArchiMate_Aggregation'
and gd.End_Object_ID in ("& domain.ElementID &")
)
inner join t_objectproperties tv on tv.Object_ID = u.Object_ID
and tv.Property = 'IsDisabled'
and isnull(tv.Value, 'False') <> 'True'
where c.Stereotype = 'ArchiMate_Assignment'
and c.End_Object_ID = f.FunctionID
 ) as Users
,f.FunctionName
,f.FunctionDescription
,(select STRING_AGG( dep.Name, char(10))
from t_connector c
inner join t_object dep on dep.Object_ID = c.End_Object_ID
and dep.Stereotype = 'ArchiMate_BusinessCollaboration'
where c.Stereotype = 'ArchiMate_Aggregation'
and c.Start_Object_ID = f.FunctionID
 ) as DeelVan
,f.RoleName
, f.RoleDescription
,cg.Name as BusinessRoleGroup
from
       (select cg.Object_ID, cg.Name, cg.Note
       from  t_object cg
       where cg.Stereotype = 'EAM_UserGroup'
       and cg.Name like 'BG%'
   and exists (select gd.Connector_ID from t_connector gd
where gd.Start_Object_ID = cg.Object_ID
and gd.Stereotype = 'ArchiMate_Aggregation'
and gd.End_Object_ID in ("& domain.ElementID &")
)
   ) cg
full outer join
       (select distinct cp.Start_Object_ID
   ,f.Object_ID as FunctionID
   ,f.Name as FunctionName
   ,f.Note as FunctionDescription
   ,pr.Name as RoleName
   ,pr.Note as RoleDescription
       from t_object f
       left join t_connector cf on f.Object_ID = cf.End_Object_ID
                                        and cf.Stereotype = 'trace'
       left join t_object pr on  cf.Start_Object_ID = pr.Object_ID
                                        and pr.Stereotype = 'PartnerRole'
       left join t_connector cp on  pr.Object_ID = cp.End_Object_ID
                                        and cp.Stereotype = 'EAM_UserGroupMember'
       where f.Stereotype = 'EAM_Function'
       ) f on f.Start_Object_ID = cg.Object_ID
) f
order by isnull(f.FunctionName, 'ZZZ'), isnull(f.RoleName, 'ZZZ')

Geert

I'm sorry. This is a local QEAX-file hence SQLite.

Yes, I know how to group, this line does that (combined with COALESCE if null):
Code: [Select]
COALESCE(GROUP_CONCAT(IK_TABLE.IK_ID, '; '), '---') AS IK_ID,

The problem is that the group duplicates values when I add the LEFT JOIN to find elements with the 'satisfy'-relationship.

Without system elements I get:
23; 45; 59

as expected. But when I add the LEFT JOIN for system elements, the above group gets duplicated the number of times a system element is found.

So if the LEFT JOIN finds one system element the output is still:
23; 45; 59

If the LEFT JOIN finds two system elements the group will be:
23; 45; 59; 23; 45; 59

and so on.

I would like it to output:
23; 45; 59

regardless of how many system elements are found.

I don't really understand why this is happening because the stakeholder requirements and system elements are not directly related to each other with connectors, they always go through the system requirement.
always learning!

Hurra

  • EA User
  • **
  • Posts: 183
  • Karma: +0/-0
    • View Profile
    • Find me at LinkedIn!
Re: [SQL] Duplicate rows with multiple LEFT JOINs
« Reply #3 on: April 21, 2023, 04:52:00 pm »
As per usual Geert is right. And just to clarify:

The secret sauce is to aggregate before the join. This removes the need to aggregate in the first SELECT statement.

In my example it would be:

Code: [Select]
LEFT JOIN (
SELECT GROUP_CONCAT(AGG_TABLE.IK_ID, '; ') AS IK_ID, AGG_TABLE.DERIVE_END
FROM (
SELECT IK_ID.Value AS IK_ID, DERIVE.Start_Object_ID AS 'DERIVE_END', IK.Object_ID AS IK_OID
FROM t_object IK
INNER JOIN t_objectproperties IK_ID ON IK_ID.Object_ID = IK.Object_ID AND IK_ID.Property = 'id'
INNER JOIN t_connector DERIVE ON DERIVE.End_Object_ID = IK.Object_ID AND DERIVE.Stereotype = 'deriveReqt'
WHERE IK.Package_ID IN (#Branch='{20273ECC-AB9C-43e3-8E6D-43E7DD60F3AC}'#)
AND IK.Stereotype = 'Intressentkrav'
GROUP BY IK_ID.Value, DERIVE.Start_Object_ID, IK.Object_ID)
AS AGG_TABLE
GROUP BY AGG_TABLE.DERIVE_END)
IK_TABLE ON IK_TABLE.DERIVE_END = SK_TABLE.SK_OID


Full SQL:
Code: [Select]
SELECT SK_TABLE.CLASSGUID, SK_TABLE.CLASSTYPE,
SK_TABLE.SK_ID AS SK_ID,
SK_TABLE.SK_KAPITEL AS SK_KAPITEL,
SK_TABLE.SK_TEXT AS SK_TEXT,
COALESCE(IK_TABLE.IK_ID, '---') AS IK_ID,
COALESCE(GROUP_CONCAT(SE_TABLE.NAME, '; '), '---') AS SE_NAME

FROM
(SELECT SK.ea_guid AS CLASSGUID, SK.Object_Type AS CLASSTYPE, SK.Object_ID AS SK_OID,
SK_ID.Value AS SK_ID, SK_PACKAGE.Name AS SK_KAPITEL, SK_TEXT.Notes AS SK_TEXT
FROM t_object SK
INNER JOIN t_objectproperties SK_ID ON SK_ID.Object_ID = SK.Object_ID AND SK_ID.Property = 'id'
INNER JOIN t_objectproperties SK_TEXT ON SK_TEXT.Object_ID = SK.Object_ID AND SK_TEXT.Property = 'text'
INNER JOIN t_package SK_PACKAGE ON SK_PACKAGE.Package_ID = SK.Package_ID
WHERE SK.Package_ID IN (#Branch='{4FD8BE48-EE95-44ae-ACCB-A7CE7851EB7C}'#)
AND SK.Stereotype = 'Systemkrav')
SK_TABLE

LEFT JOIN (
SELECT GROUP_CONCAT(AGG_TABLE.IK_ID, '; ') AS IK_ID, AGG_TABLE.DERIVE_END
FROM (
SELECT IK_ID.Value AS IK_ID, DERIVE.Start_Object_ID AS 'DERIVE_END', IK.Object_ID AS IK_OID
FROM t_object IK
INNER JOIN t_objectproperties IK_ID ON IK_ID.Object_ID = IK.Object_ID AND IK_ID.Property = 'id'
INNER JOIN t_connector DERIVE ON DERIVE.End_Object_ID = IK.Object_ID AND DERIVE.Stereotype = 'deriveReqt'
WHERE IK.Package_ID IN (#Branch='{20273ECC-AB9C-43e3-8E6D-43E7DD60F3AC}'#)
AND IK.Stereotype = 'Intressentkrav'
GROUP BY IK_ID.Value, DERIVE.Start_Object_ID, IK.Object_ID)
AS AGG_TABLE
GROUP BY AGG_TABLE.DERIVE_END)
IK_TABLE ON IK_TABLE.DERIVE_END = SK_TABLE.SK_OID

LEFT JOIN (
SELECT SE.Name AS NAME, SATISFY.End_Object_ID AS SATISFY_END
FROM t_object SE
INNER JOIN t_connector SATISFY ON SATISFY.Start_Object_ID = SE.Object_ID AND SATISFY.Stereotype = 'satisfy'
WHERE SE.Package_ID IN (#Branch='{95E5A654-A217-4122-B011-045E9F7A720D}'#)
AND SE.Stereotype = 'OperationalPerformer')
SE_TABLE ON SE_TABLE.SATISFY_END = SK_TABLE.SK_OID

GROUP BY SK_TABLE.CLASSGUID
ORDER BY LENGTH(SK_TABLE.SK_ID), SK_TABLE.SK_ID

'Pseudo SQL':
Code: [Select]
LEFT JOIN (
SELECT GROUP_CONCAT(TEMP_TABLE.VALUE, 'delimeter') AS VALUE, TEMP_TABLE.JOIN_VALUE AS JOIN_VALUE
FROM (
-- SQL QUERY
)
AS TEMP_TABLE
GROUP BY TEMP_TABLE.JOIN_VALUE)
MAIN_TABLE ON MAIN_TABLE.JOIN_VALUE = OTHER_TABLE.JOIN_VALUE

Thank you Geert!
always learning!