Ñò
ÝT~Kc           @   s_  d  Z  d d k Z d d k Z d d k Z d d k l Z d d k l Z l Z d d k	 l
 Z
 d d k l Z d d k Z d d k Z y d d k l Z Wn d d k l Z n Xe i d	 ƒ Z d
 „  Z d „  Z d „  Z e i d e i ƒ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z  d „  Z! d „  Z" d d „ Z$ d S(   sÌ  
Reconstitute an entry document from the output of the Universal Feed Parser.

The main entry point is called 'reconstitute'.  Input parameters are:

  results: this is the entire hash table return by the UFP
  entry:   this is the entry in the hash that you want reconstituted

The value returned is an XML DOM.  Every effort is made to convert
everything to unicode, and text fields into either plain text or
well formed XHTML.

Todo:
  * extension elements
iÿÿÿÿN(   t   escape(   t   minidomt   Node(   t   html5parser(   t   dom(   t   md5(   t   news
   [--]c         C   s‹   | p d St  | t ƒ o0 y | i d ƒ } WqL | i d ƒ } qL Xn |  i } | i | ƒ } | i | i | ƒ ƒ |  i | ƒ | S(   sC    utility function to create a child element with the specified textNs   utf-8s
   iso-8859-1(   t
   isinstancet   strt   decodet   ownerDocumentt   createElementt   appendChildt   createTextNode(   t   parentt   namet   valuet   xdoct   xelement(    (    s%   /net/bzr/venus/planet/reconstitute.pyt   createTextElement   s     	c         C   s)   d d t  t |  i d ƒ ƒ ƒ d d S(   s    replace invalid characters s#   <acronym title="U+%s">ï¿½</acronym>t   000i    i   iüÿÿÿ(   t   hext   ordt   group(   t   c(    (    s%   /net/bzr/venus/planet/reconstitute.pyt
   invalidate,   s    c         C   sR   |  i  d ƒ }  |  i d ƒ o t t |  d d ƒ ƒ }  n t t |  ƒ ƒ }  |  S(   s4    convert numeric character references to characters i   t   xi   (   R   t
   startswitht   unichrt   int(   R   (    (    s%   /net/bzr/venus/planet/reconstitute.pyt   ncr2c1   s
    s   \W+c         C   s]   y. t  i d |  i d ƒ ƒ i ƒ  i d ƒ }  Wn t  i d |  ƒ i ƒ  }  n X|  i d ƒ S(   s    generate a css id from a name t   -s   utf-8(   t   nonalphat   subR	   t   lowert   encodet   strip(   R   (    (    s%   /net/bzr/venus/planet/reconstitute.pyt   cssid;   s
    .c         C   s?  | i  d ƒ o | i o | i } nú | i  d ƒ o | i o | i } nÓ | i  d ƒ o1 | i o' | i i d t | i ƒ i ƒ  } n’ | i  d ƒ o1 | i o' | i	 i d t | i ƒ i ƒ  } nQ | i  d ƒ o< | i
 o2 | i
 d i d t | i
 d i ƒ i ƒ  } n d S|  o t |  d | ƒ n | S(	   s%    copy or compute an id for the entry t   idt   linkt   titlet   /t   summaryt   contenti    N(   t   has_keyR&   R'   R(   t   title_detailt   baseR   t	   hexdigestR*   t   summary_detailR+   R   R   (   t   xentryt   entryt   entry_id(    (    s%   /net/bzr/venus/planet/reconstitute.pyR&   C   s"    ! c         C   sO  | i  d ƒ pD g  | d <| i  d ƒ o& | d i h d d 6| i d 6ƒ qT n |  i } xë | d D]ß } d | i ƒ  j o qh n | i d ƒ } | i d | i d ƒ ƒ | i  d ƒ o | i d | i d ƒ ƒ n | i  d ƒ o  | i d | i d d ƒ ƒ n | i  d ƒ o | i d | i d ƒ ƒ n |  i	 | ƒ qh Wd S(	   s    copy links to the entry t   linksR'   t	   alternatet   relt   hreft   typet   lengthN(
   R,   t   appendR'   R
   t   keysR   t   setAttributet   gett   NoneR   (   R1   R2   R   R'   t   xlink(    (    s%   /net/bzr/venus/planet/reconstitute.pyR4   Z   s$    
*	   c         C   se   | p d St  i d | ƒ } t |  | | ƒ } t  i t i ƒ  | ƒ } | i d | i d ƒ ƒ d S(   s/    insert a date-formated element into the entry Ns   %Y-%m-%dT%H:%M:%SZs   planet:formats   utf-8(   t   timet   strftimeR   t   configt   date_formatR<   R	   (   R1   R   t   parsedt	   formattedt   xdate(    (    s%   /net/bzr/venus/planet/reconstitute.pyt   datem   s     c         C   sË   |  i  i d ƒ } | i d ƒ p | i o d  S| i d | i d ƒ ƒ | i d ƒ o' | i o | i d | i d ƒ ƒ n | i d ƒ o' | i o | i d | i d ƒ ƒ n |  i | ƒ d  S(   Nt   categoryt   termt   schemet   label(	   R
   R   R,   RI   R<   R=   RJ   RK   R   (   R1   t   tagt   xtag(    (    s%   /net/bzr/venus/planet/reconstitute.pyRH   u   s     c         C   s³   | p d S|  i  } | i | ƒ } | i d d ƒ o t | d | i d ƒ ƒ n | i | i d ƒ ƒ t | d | i d d ƒ ƒ t | d | i d d ƒ ƒ |  i | ƒ d S(   s.    insert an author-like element into the entry NR   t   emailt   uriR7   (   R
   R   R=   R>   R   R   (   R1   R   t   detailR   t   xauthor(    (    s%   /net/bzr/venus/planet/reconstitute.pyt   author   s     	c         C   sà  | p | i  o d Sd } d } |  i } | i | ƒ } t | i  t ƒ o | i  i d ƒ | _  n | i d ƒ p | i i	 ƒ  i
 d ƒ d j  o! t | i  ƒ | d <d | d <n | i i
 d	 ƒ d j oI | oA y- t i | | i  ƒ i } | i d d	 ƒ Wqd
 } qXn | i i
 d	 ƒ d j  p | oUt i d t i ƒ } | i | | i  d d ƒ}	 x!|	 i i D]}
 |
 i t i j o qyn |
 i d j o qyn xÕ |
 i D]Ê } | i t i j o qºn | i d j o qºn y^ | i ƒ  t | i ƒ d
 j o# | i i t i j o | i } n | } | i d d	 ƒ PWqº| i d d ƒ | i | i  i d ƒ ƒ } qºXqºWqyWn | o | i  | ƒ n | i! d ƒ o | i d | i" ƒ n |  i  | ƒ d S(   s.    insert a content-like element into the entry Ns2   <div xmlns="http://www.w3.org/1999/xhtml">%s</div>s   utf-8R8   t   htmli    R   s	   text/htmlt   xhtmli   t   treet   encodingt   bodyt   divt   languages   xml:lang(#   R   R>   R
   R   R   t   unicodeR#   R,   R8   R"   t   findR    R   t   parseStringt   documentElementR<   R   t
   HTMLParserR   t   TreeBuildert   parset
   childNodest   nodeTypeR   t   ELEMENT_NODEt   nodeNamet	   normalizet   lent
   firstChildt	   TEXT_NODER   R	   R   R=   RY   (   R1   R   RP   t   bozot   datat   xdivR   t   xcontentt   parserRS   RW   RX   (    (    s%   /net/bzr/venus/planet/reconstitute.pyR+      s`     	0!    
   
+ c         C   s‡   | p | o d St  |  d d	 d | ƒ } | i d
 d ƒ t  |  d d d | ƒ } | i d d ƒ |  i | ƒ |  i | ƒ d S(   s$    insert geo location into the entry Ns   %s:%st   geot   lats   %fs   xmlns:%ss(   http://www.w3.org/2003/01/geo/wgs84_pos#t   long(   s   geos   lats	   xmlns:geo(   s   geos   longs	   xmlns:geo(   R   R<   R   (   R1   Rp   Ro   t   xlatt   xlong(    (    s%   /net/bzr/venus/planet/reconstitute.pyt   locationÆ   s     c   	      C   sw  |  i  } t |  d | i d | i d d ƒ ƒ ƒ t |  d | i d d ƒ ƒ t |  d | i d d ƒ ƒ | i d ƒ o3 | i d ƒ o# t |  d | i i d d ƒ ƒ n x' | i d g  ƒ D] } t |  | ƒ qÀ Wt |  d | i d	 h  ƒ ƒ x* | i d
 g  ƒ D] } t |  d | ƒ qWt |  | ƒ t	 |  d | i d d ƒ | ƒ t	 |  d | i d d ƒ | ƒ t	 |  d | i d d ƒ | ƒ t
 |  d | i d t i ƒ  ƒ ƒ | o | | d <n | d j p | o d p d | d <n | i d ƒ o) | i d ƒ o t | d ƒ | d <n xM | i ƒ  D]? \ } } | i d ƒ o# t |  | i d d d ƒ | ƒ q0q0Wd S(   s&    copy source information to the entry R&   R'   t   icont   logot   imageR7   t   tagsRR   t   author_detailt   contributorst   contributort   rightst   rights_detailt   subtitlet   subtitle_detailR(   R-   t   updatedt   updated_parsedt   planet_formatt   truet   falset   planet_bozot   planet_names   planet_css-idt   planet_t   _t   :i   N(   R
   R   R=   R>   R,   Rv   RH   RR   R4   R+   RG   R@   t   gmtimeR%   t   itemsR   t   replace(	   t   xsourcet   sourceRi   t   formatR   RL   Rz   t   keyR   (    (    s%   /net/bzr/venus/planet/reconstitute.pyR   Ò   s8    	(!#  "  ! c         C   s‹  t  i d ƒ } | i } | i d t i ƒ | i d ƒ o | i d | i ƒ n. |  i i d ƒ o | i d |  i i ƒ n t	 | | ƒ t
 | | ƒ |  i } | i d ƒ p | i o | i | i d ƒ ƒ n t | d | i d d* ƒ | ƒ t | d | i d d* ƒ | ƒ t | d	 | i d	 d* g ƒ d
 | ƒ t | d | i d d* ƒ | ƒ t | d t |  i | t i ƒ  ƒ ƒ t | d | i d d* ƒ ƒ x' | i d g  ƒ D] } t | | ƒ q¯Wx™ d+ g D]Ž \ } } | i d | | i ƒ  f ƒ ob |  i i | ƒ oO t | d | | f | d | | i ƒ  f ƒ } | i d | |  i | ƒ qÐqÐW| i d ƒ oæ | i d g  ƒ i d ƒ oÊ | i d g  ƒ i d ƒ o® | i d g  ƒ }	 |	 i d d* ƒ }
 |	 i d d* ƒ } |
 d j o t | | d
 | d ƒ qX|
 d j p |
 d j p |
 d j o$ t | | d
 d
 | d
 d ƒ qXn | i d ƒ oH | i d ƒ o8 t | t | i d d* ƒ ƒ t | i d d* ƒ ƒ ƒ n | i d  h  ƒ } | o9 | i d! ƒ o( |  i i d" ƒ o |  i d" | d! <n t | d# | ƒ x* | i d$ g  ƒ D] } t | d% | ƒ q%W| i d& ƒ } | oi x; |  i i ƒ  D]* \ } } | i d' ƒ o | | | <qeqeW|  i i d( ƒ o |  i i	 | d) <qÇn
 |  i } | i d  h  ƒ } | p | i d! ƒ oc | i d! ƒ oR |  i i d" ƒ o? | o | i | i  ƒ  ƒ } n | | d  <|  i d" | d! <n | i d& ƒ } t! | | | |  i" ƒ | i | ƒ | S(,   s-    create an entry document from a parsed feed s-   <entry xmlns="http://www.w3.org/2005/Atom"/>
s   xmlns:planetRY   s   xml:langR(   R-   R*   R0   R+   i    R{   R|   R   t	   publishedt   published_parsedRw   t
   feedburnert   origLinks   %s_%ss   %s:%ss   xmlns:%st   whereR8   t   coordinatest   Pointi   t   Boxt
   LineStringt   Polygont   geo_latt   geo_longRx   R   R…   RR   Ry   Rz   R   R†   R&   t	   planet_idN(   s
   feedburnerR“   (#   R   R\   R]   R<   t   planett   xmlnsR,   RY   t   feedR&   R4   Ri   R(   R   R   R+   R=   R>   RG   t   entry_updatedR@   R‰   RH   R"   t
   namespacesR   Rs   t   floatRR   RŠ   R   t	   __class__t   copyR   t   version(   RŸ   R2   R   R1   Ri   RL   t   nsR   t	   xoriglinkR”   R8   R•   Rx   Rz   t   srcR   t
   src_authorRŒ   (    (    s%   /net/bzr/venus/planet/reconstitute.pyt   reconstituteö   sŠ    		&% 
  #'(8   	$ 
c         C   s`   | d f | d f |  d f f } x8 | D]0 \ } } | i  | ƒ o | | o	 | | Sq( W| S(   NR€   R‘   (   R,   (   RŸ   R2   t   defaultt   chkst   nodet   field(    (    s%   /net/bzr/venus/planet/reconstitute.pyR    L  s    		 (%   t   __doc__t   reR@   t   sgmllibt   xml.sax.saxutilsR    t   xml.domR   R   t   html5libR   t   html5lib.treebuildersR   R   RB   t   hashlibR   R   t   compilet   illegal_xml_charsR   R   R   t   UNICODER    R%   R&   R4   RG   RH   RR   R+   Rs   R   Rª   R>   R    (    (    (    s%   /net/bzr/venus/planet/reconstitute.pyt   <module>   s4   $									
		7		$	V
