- SQL code
/*有一张表(大约200万条记录)。为方便测试,剔除无关信息,随机生成10000行记录,保留3列,记为:test(usrid,value,u_type),其中usrid唯一,value在1000范围以内,u_type为‘Yes'或‘No’。--要求:选择表中value值相同但u_type不同的记录,将其usrid成对找出来。--例如:原始表为:usrid value u_type1 1 Yes 2 34 No4 86 No5 34 No6 7 Yes8 1 Yes9 1 No3 10 Yes89 10 Yes78 7 No14 2 No66 2 Yes102 2 No708 8 Yes84 8 No99 8 Yes182 8 No则,最终表为(只有1行):Usrid 19 678 14 66 8499182708 这里像value为1的记录,u_type有2个Yes,1个No。属于多对1,那么任意挑一个Yes和No的记录,找出其usrid(1和9)。value为2的记录属于1对多,做类似处理。但是多对多的时候,要取Yes和No最小记录数,例如value为8时,有2个Yes,2个No,那么都要取出,即:对于每一个相同的value,取出的记录数是:2*min(Yes,No)。*/--随机生成数据if OBJECT_ID('test2') is not nulldrop table test2gocreate table test2(usrid int,value int,u_type varchar(5))declare @i intset @i=1while @i<=10000begin insert into test2 values(@i,ABS(CHECKSUM(newid())%1000),ABS(CHECKSUM(newid()))%2) set @i=@i+1endupdate test2set u_type=case u_type when 1 then 'Yes' when 0 then 'No' end from test2 --select count(1) from test2
------解决方案--------------------
- SQL code
;with cte as( select value,sum(case when u_type='Yes' then 1 else 0 end) Yes,sum(case when u_type='No' then 1 else 0 end) No from @test group by value having count(distinct u_type)>1)select t.usrid from( select a.usrid,a.value,a.u_type,row_number() over(partition by a.value,a.u_type order by newid()) rn from @test a,cte b where a.value=b.value and b.Yes>b.No) twhere t.rn=1union allselect t.usrid from( select a.usrid,a.value,a.u_type,row_number() over(partition by a.value,a.u_type order by a.usrid) rn from @test a,cte b where a.value=b.value and b.Yes<b.No) twhere t.rn=1union allselect t.usrid from( select a.usrid,a.value,a.u_type,row_number() over(partition by a.value,a.u_type order by a.value) rn from @test a,cte b where a.value=b.value and b.Yes=b.No) torder by usrid
------解决方案--------------------