
    Ӕ>j-                        d Z ddlZddlZddlZddlZddlZddlZddlmZ ej                  j                  e      ZddZe G d d             ZddZd Zd	 Zd
 Zd ZddZd Zd Zd Zd ZddZddZd Zd Zd Zedk(  r e        yy)a  
Minimal reproduction of the core claim in:
Foret et al. (2021), "Sharpness-Aware Minimization for Efficiently Improving Generalization".

This is intentionally dependency-free: pure Python stdlib only.
It compares standard mini-batch SGD vs SAM on a noisy two-moons binary classification task.
The goal is not to reproduce the paper's CIFAR/ImageNet tables, but to reproduce the central
phenomenon: optimizing for neighborhoods of low loss can improve held-out generalization.
    N)	dataclassc           
         t        j                  |      }g g }}t        |       D ]  }|dz  }|j                         t        j                  z  }|dk(  r+t        j
                  |      t        j                  |      }
}	n0dt        j
                  |      z
  dt        j                  |      z
  }
}	|	|j                  d|      z  }	|
|j                  d|      z  }
|j                  |	|
g       |j                  |        t        t        |             }|j                  |       |D cg c]  }||   	 }}|D cg c]  }||   	 }}t        | dz        }t        d      D cg c]  t        fd|d | D              |z   c}g }t        d      D ]D  t        fd|d | D              |z  }|j                  t        j                  |      dz          F |D cg c]*  }t        d      D cg c]  }||   |   z
  ||   z   c}, }}}|d | |d | ||d  ||d  fS c c}w c c}w c c}w c c}w c c}}w )	N   r         ?      ?g?c              3   (   K   | ]	  }|     y wN ).0rowjs     D/root/.openclaw/workspace/reproductions/sam_minimal/reproduce_sam.py	<genexpr>zmake_moons.<locals>.<genexpr>&   s     -CQ-s   c              3   :   K   | ]  }|      z
  d z    yw)r   Nr
   )r   r   r   meanss     r   r   zmake_moons.<locals>.<genexpr>)   s#     @s3q6E!H$*@s   -q=)randomRandomrangemathpicossingaussappendlistshuffleintsumsqrt)nnoiseseedrngXyilabeltx1x2idxsplitr   stdsvarr   r   s                `   @r   
make_moonsr0      s   
--
CrqA1X 
AJJL477"A:XXa[$((1+B488A;&dhhqk(9B
cii5!!
cii5!!	"b	
 uQx.CKK!1A!1ADME?DQxH!S-1Ve9--5HED1X ,@ai@@5HDIIcNU*+, GHHsq	:A3q6E!HQ
'	:HAHVe9ai56AefI55 	 I
 
;Hs*   )H&;H+%!H0"H:4H5H:5H:c                   6    e Zd ZU eed<   eed<   eed<   eed<   y)ParamsW1b1W2b2N)__name__
__module____qualname__r   __annotations__floatr
       r   r2   r2   0   s    HHHIr<   r2   c           
      n   t        j                  |       }t        d      D cg c]+  }t        |      D cg c]  }|j                  dd       c}- }}t        |      D cg c]  }d }}t        |      D cg c]  }|j                  dd       }}t	        |||d      S c c}w c c}w c c}w c c}w )Nr   r   gffffff?        )r   r   r   r   r2   )r#   hiddenr$   _r3   r4   r5   s          r   init_paramsrA   7   s    
--
C?DQx	H!uV}
5!399Q
5	HB	HV}	%!#	%B	%&+Fm	4#))At
	4B	4"b"c"" 6	H	%	4s"   B(B#
B("	B-:B2#B(c           
          t        | j                  D cg c]  }|D cg c]  }d c} c}}| j                  D cg c]  }d c}| j                  D cg c]  }d c}d      S c c}w c c}}w c c}w c c}w )Nr>   )r2   r3   r4   r5   )pr   r@   s      r   
zeros_likerD   >   s\    6#%AC%6add8K8K[\[_[_M`VWcM`beff%68KM`s    	A,
	A'A,
	A2
	A7'A,
c                     t        | j                  D cg c]  }|d d  	 c}| j                  d d  | j                  d d  | j                        S c c}w r	   )r2   r3   r4   r5   r6   )rC   r   s     r   copy_paramsrF   A   s=    QTT*c3q6*ADDGQTT!WaddCC*s   Ac                     | dk\  rt        j                  |        }dd|z   z  S t        j                  |       }|d|z   z  S )Nr   r   )r   exp)zezs     r   sigmoidrK   D   sB    AvXXqb\cBh	!Br?r<   c                     g t        t         j                              D ]g  }|d    j                  d   |   z  |d    j                  d   |   z  z    j                  |   z   }j	                  t        j                  |             i t         fdt        t                    D               j                  z   }t        |      fS )Nr      c              3   H   K   | ]  }|   j                   |   z    y wr	   )r5   )r   khrC   s     r   r   zforward_one.<locals>.<genexpr>P   s!     61!qttAw6s   ")
r   lenr4   r3   r   r   tanhr   r6   rK   )rC   xrO   rI   logitrP   s   `    @r   forward_onerU   K   s    
A3qtt9 aD14471:!qttAwqz 11ADDG;	1 6c!f66=Egenr<   c                 V   t        |       }d}t        |      }t        | j                        }|D ]<  }	||	   ||	   }}
t        | |
      \  }}t	        t        |d      d      }||t        j                  |      z  d|z
  t        j                  d|z
        z  z    z  }||z
  }t        |      D ]  }|j                  |xx   |||   z  z  cc<   ! |xj                  |z  c_
        t        |      D ]z  }|| j                  |   z  d||   ||   z  z
  z  }|j                  d   |xx   ||
d   z  z  cc<   |j                  d   |xx   ||
d   z  z  cc<   |j                  |xx   |z  cc<   | ? d|z  }||z  }t        |      D ]	  }|d|z  | j                  d   |   dz  | j                  d   |   dz  z   | j                  |   dz  z   z  z  }|j                  d   |   |z  || j                  d   |   z  z   |j                  d   |<   |j                  d   |   |z  || j                  d   |   z  z   |j                  d   |<   |j                  |   |z  || j                  |   z  z   |j                  |<   |j                  |xx   |z  cc<    |xj                  |z  c_
        ||fS )	Nr>   :0yE>G?rM   r   r   r   r   )rD   rQ   r4   rU   minmaxr   logr   r5   r6   r3   )rC   r%   r&   indiceswdglossr!   Hr,   rS   targetrP   probdlogitrO   dzinvs                     r   loss_and_gradrf   S   s   1ADGAADD	A cFAcF6a#43tT?H-&488D>)QZ488AH;M,MMNNq 	%ADDGv!}$G	%	q 	A!$$q'!Q1!_5BDDGAJ"qt)#JDDGAJ"qt)#JDDGrMG		 'CCKD1X bADDGAJ!Oadd1gajAo=Q1LMMTT!WQZ#%QTT!WQZ7Q
TT!WQZ#%QTT!WQZ7Q
$$q'C-"qttAw,.Q	Q3 DDCKD7Nr<   c                    | j                   | j                   z  }| j                  D ]  }|D ]
  }|||z  z  }  | j                  D ]
  }|||z  z  } | j                  D ]
  }|||z  z  } t	        j
                  |      dz   S )Nr   )r6   r3   r4   r5   r   r    )r^   sr   vs       r   	grad_normrj   q   s    	qttAtt ! Aa1q5ja !TT1A:1TT1A:199Q<%r<   c                    t        d      D ]R  }t        t        | j                              D ]/  }| j                  |   |xx   ||j                  |   |   z  z  cc<   1 T t        t        | j                              D ]P  }| j                  |xx   ||j                  |   z  z  cc<   | j                  |xx   ||j                  |   z  z  cc<   R | xj
                  ||j
                  z  z  c_        y )Nr   )r   rQ   r4   r3   r5   r6   )rC   r^   scaler'   rO   s        r   
add_scaledrm   y   s    1X -s144y! 	-ADDGAJ%!$$q'!*,,J	-- 3qtt9 #	Q51447?"	Q51447?"# DDEADDLDr<   c                      t        | ||        y r	   )rm   )rC   r^   lrs      r   sgd_steprp      s    q!bSr<   c                 d   d}d}t        ||      D ]  \  }}t        | |      \  }}t        t        |d      d      }||t	        j
                  |      z  d|z
  t	        j
                  d|z
        z  z    z  }|t        |dk\  t        |      k(        z  } |t        |      z  |t        |      z  fS )Nr>   r   rW   rX   rM   r   )	ziprU   rY   rZ   r   r[   r   boolrQ   )	rC   r%   r&   r_   correctrS   ra   r@   rb   s	            r   evaluateru      s    DGAY 6	6a#43tT?H-&488D>)QZ488AH;M,MMNN3V455	6
 #a&='CF***r<   c                    t        j                  |      }t        | ||      \  }}d}	t        | j                        }
t        |      D ]  }t        |       }|j                  dd      |_        t        d      D ]4  }t        |
      D ]$  }|j                  dd      |j                  |   |<   & 6 t        |
      D ]@  }|j                  dd      |j                  |<   |j                  dd      |j                  |<   B t        |      }t        |       }t        ||||z         t        |||      \  }}t        |	||z
        }	 |	S )Nr>   r   rM   r   )r   r   ru   rQ   r4   r   rD   r   r6   r3   r5   rj   rF   rm   rZ   )rC   r%   r&   r#   radiustrialsr$   	base_lossr@   max_increaser`   dr'   rO   normq	pert_losss                    r   sharpness_proxyr      s6   
--
CAq!$LIqLADD	A6] @qMyyAq 	-A1X - YYq!_Q
-	- q 	&Aii1oADDGii1oADDG	& |N1a$'1a(	1<Y)>?@ r<   c           	      ^   t        d|z         \  }}}}	t        d|z         }
t        j                  d|z         }g }t	        |      }t        d|dz         D ]  }t        t        |            }|j                  |       t        d||      D ]  }||||z    }| dk(  rt        |
|||      \  }}t        |
||       /| dk(  r\t        |
|||      \  }}t        |      }t        |
|||z         t        |
|||      \  }}t        |
|| |z         t        |
||       t        |        |d	z  dk(  s|dk(  st        |
||      \  }}t        |
||	      \  }}|j                  |||||d
        t        |
||      \  }}t        |
||	      \  }}t        |
||	d|z         }| |||||||dS )Ni'  )r#   i  i N  rM   r   sgdsam   )epoch
train_loss	train_acc	test_losstest_acci0u  )methodr#   r   r   r   r   r   history)r0   rA   r   r   rQ   r   r   r   rf   rp   rj   rm   
ValueErrorru   r   r   )r   r#   epochs
batch_sizero   rhoXtrytrXteyterC   r$   histr!   epr\   startbatchr@   r^   g1r|   g2tr_losstr_accte_losste_accsharps                               r   trainr      s   #$7Cc3&A
--
&CDCAAvz" }uQx.G1a, 	)EE%
"23E$QS%81Ar"5%ac592 }1b#*-%ac5921b3$+.B# ((	) 7a<27&q#s3OGV&q#s3OGVKK"G&_ftz{|)}* q#s+OGVq#s+OGVAsCftm<E$F&  r<   c                 x    t        j                  |       t        |       dkD  rt        j                  |       fS dfS )NrM   r>   )
statisticsmeanrQ   stdev)valss    r   mean_sdr      s4    ??4 SY]:#3#3D#9TTPSTTr<   c                    d\  }}d}ddg}ddd}|D ci c]  }|| |   d    }}|D ci c]  }|| |   d	    }}t        |j                               d
z  }	d| d| d| d| d	dddg}
d\  }}}}|
d| dgz  }
t        |      D ]  \  }}d}|dz   |dz  z   }||   dz
  dz  |z  }||z   |z
  }|
j                  d| d|dd| d|dd||    d       |
j                  d||d z  z    d|d!z
  dd"||   d#z  d$d%       |
j                  d||d z  z    d||z   d&z    d'|j	                          d(        |
j                  d)| d*||z    d+||z    d,||z    d-	       |
j                  d| d||z   d.z    d/       d0}|
d| d1gz  }
t        |      D ]  \  }}d}|dz   |dz  z   }|	r||   |	z  |z  nd2}||z   |z
  }|
j                  d| d|dd| d|dd||    d       |
j                  d||d z  z    d|d!z
  dd"||   d3d(       |
j                  d||d z  z    d||z   d&z    d'|j	                          d(        |
j                  d)| d*||z    d+||z    d,||z    d-	       |
j                  d4       t
        j                  j                  t        d5      }t        |d6d78      5 }|j                  d9j                  |
             d d d        |S c c}w c c}w # 1 sw Y   |S xY w):N)i  i  F   r   r   z#7c8aa5z#f97316r   r   test_acc_meansharpness_proxy_meang      ?z/<svg xmlns="http://www.w3.org/2000/svg" width="z
" height="z" viewBox="0 0  z">z1<rect width="100%" height="100%" fill="#0b1020"/>z<text x="40" y="42" fill="#f8fafc" font-family="Inter,Arial" font-size="22" font-weight="700">SAM minimal reproduction: generalization and sharpness</text>z<text x="40" y="70" fill="#94a3b8" font-family="Inter,Arial" font-size="13">Noisy two-moons MLP; mean over 8 random seeds. Higher test accuracy is better; lower sharpness proxy is better.</text>)r   x        z	<text x="zP" y="105" fill="#e2e8f0" font-family="Arial" font-size="16">Test accuracy</text>7   i   g      ?g      ?z	<rect x="z" y="z.1fz	" width="z" rx="8" fill="z"/>r      zI" fill="#f8fafc" text-anchor="middle" font-family="Arial" font-size="14">d   z.2fz%</text>   zI" fill="#cbd5e1" text-anchor="middle" font-family="Arial" font-size="14">z</text>z
<line x1="z" y1="z" x2="z" y2="z" stroke="#334155"/>-   zM" fill="#64748b" font-family="Arial" font-size="12">axis starts at 75%</text>i  zR" y="105" fill="#e2e8f0" font-family="Arial" font-size="16">Sharpness proxy</text>r   z.4fz</svg>zsam_minimal_results.svgwutf-8encoding
)rZ   values	enumerater   upperospathjoinOUT_DIRopenwrite)summaryr   rP   marginmethodscolorsmaccr   	max_sharplinesx0y0phpwr'   bwrS   bhr&   r   fs                         r   	write_svgr      s   DAqFenGy1F3:
;a1gaj))
;C
;<CDqQ
122DEDELLN#d*I>qcA3o^_]``abcaddfg@ k RSE
 'NBB		"mnooE'" U1Ga#g!ftmt#b(GbLy53yJr#ho^def^g]hhklmy2a4ac#Y6  AD  EF  AG  HK  AK  LO  @P  PX  Y  	Zy2a4beBhZ  8A  BC  BI  BI  BK  AL  LS  T  	UU 
LL:bT2wfRUG6"R%H\]^	LL9RDbeBhZ/|}~	B		"opqqE'" U1Ga#g*3U1X	!B&GbLy53yJr#ho^def^g]hhklmy2a4ac#Y6  AF  GH  AI  JM  @N  NU  V  	Wy2a4beBhZ  8A  BC  BI  BI  BK  AL  LS  T  	UU 
LL:bT2wfRUG6"R%H\]^	LL77<<!:;D	dC'	* "a			% !"KI <DB"Ks   KK 0!K%%K/c            
         t        t        d            } g }g }| D ]  }dD ]z  }t        d| d| d       t        ||      }|j	                  |j                         D ci c]  \  }}|dk7  s|| c}}       |d   D ]  }|j	                  ||d|        |  i }	dD ][  }|D cg c]  }|d	   |k(  s| }
}i |	|<   d
D ]7  }t        |
D cg c]  }||   	 c}      \  }}||	|   |dz   <   ||	|   |dz   <   9 ] |	d   d   |	d   d   z
  |	d<   |	d   d   |	d   d   z
  |	d<   t        t        j                  j                  t        d      ddd      5 }t        j                  |t        |d   j                                     }|j                          |j!                  |       d d d        t        t        j                  j                  t        d      ddd      5 }t        j                  |t        |d   j                                     }|j                          |j!                  |       d d d        t        t        j                  j                  t        d      dd      5 }t#        j$                  |	|d       d d d        t'        |	      }t        t#        j(                  |	d             t        d|       y c c}}w c c}w c c}w # 1 sw Y   /xY w# 1 sw Y   xY w# 1 sw Y   jxY w) Nr   r   ztrain z seed=T)flushr   )r   r#   r   )r   r   r   r   _mean_sdr   r   r   delta_test_acc_sam_minus_sgdr   delta_sharpness_sam_minus_sgdzresults.csvr    r   )newliner   r   )
fieldnameszhistory.csvzsummary.jsonr   r   )indentwrote)r   r   printr   r   itemsr   r   r   r   r   r   csv
DictWriterkeyswriteheader	writerowsjsondumpr   dumps)seedsrows	historiesr#   r   rrO   ri   rP   r   mskeymusdr   wrsvgs                    r   mainr      s   qNEDI H$ 	HFF6(&/t<fd#AKK!'')F$!QqI~AFGy\ H  FD!FA!FGH		HH G  .7A8!6a77L 	.Cb1af12FB-/GFOC'M*+-GFOC%K(	.. /6en_.MPWX]P^_nPo.oG*+/6u~>T/UX_`eXfg}X~/~G+,	bggll7M2Cg	V -Z[^^A$tAw||~*>?
",,t,- 
bggll7M2Cg	V 2Z[^^A$y|/@/@/B*CD
",,y12 
bggll7N3S7	K (q		'1Q'(
G
C	$**WQ
'(	'31 G
 8 2- -2 2( (sD    K
.K
*K8KKAK&AK'1K3K$'K03K<__main__)i  g)\(?r   )   )g-C6?)r   g?   )r   @   g)\(?gQ?)__doc__r   r   r   r   r   r   dataclassesr   r   dirname__file__r   r0   r2   rA   rD   rF   rK   rU   rf   rj   rm   rp   ru   r   r   r   r   r   r7   r
   r<   r   <module>r      s    / . . !
''//(
#6<   #gD< +0$LU*X D zF r<   