root/src/xml.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. libxml2_loaded_p
  2. load_dll_functions
  3. libxml2_loaded_p
  4. init_libxml2_functions
  5. make_dom
  6. parse_region
  7. xml_cleanup_parser
  8. DEFUN
  9. syms_of_xml

     1 /* Interface to libxml2.
     2    Copyright (C) 2010-2023 Free Software Foundation, Inc.
     3 
     4 This file is part of GNU Emacs.
     5 
     6 GNU Emacs is free software: you can redistribute it and/or modify
     7 it under the terms of the GNU General Public License as published by
     8 the Free Software Foundation, either version 3 of the License, or (at
     9 your option) any later version.
    10 
    11 GNU Emacs is distributed in the hope that it will be useful,
    12 but WITHOUT ANY WARRANTY; without even the implied warranty of
    13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14 GNU General Public License for more details.
    15 
    16 You should have received a copy of the GNU General Public License
    17 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    18 
    19 #include <config.h>
    20 
    21 #include "lisp.h"
    22 #include "buffer.h"
    23 
    24 #ifdef HAVE_LIBXML2
    25 
    26 #include <libxml/tree.h>
    27 #include <libxml/parser.h>
    28 #include <libxml/HTMLparser.h>
    29 
    30 
    31 #ifdef WINDOWSNT
    32 
    33 # include <windows.h>
    34 # include "w32common.h"
    35 # include "w32.h"
    36 
    37 DEF_DLL_FN (htmlDocPtr, htmlReadMemory,
    38              (const char *, int, const char *, const char *, int));
    39 DEF_DLL_FN (xmlDocPtr, xmlReadMemory,
    40              (const char *, int, const char *, const char *, int));
    41 DEF_DLL_FN (xmlNodePtr, xmlDocGetRootElement, (xmlDocPtr));
    42 DEF_DLL_FN (void, xmlFreeDoc, (xmlDocPtr));
    43 DEF_DLL_FN (void, xmlCleanupParser, (void));
    44 DEF_DLL_FN (void, xmlCheckVersion, (int));
    45 
    46 static bool
    47 libxml2_loaded_p (void)
    48 {
    49   Lisp_Object found = Fassq (Qlibxml2, Vlibrary_cache);
    50 
    51   return CONSP (found) && EQ (XCDR (found), Qt);
    52 }
    53 
    54 # undef htmlReadMemory
    55 # undef xmlCheckVersion
    56 # undef xmlCleanupParser
    57 # undef xmlDocGetRootElement
    58 # undef xmlFreeDoc
    59 # undef xmlReadMemory
    60 
    61 # define htmlReadMemory fn_htmlReadMemory
    62 # define xmlCheckVersion fn_xmlCheckVersion
    63 # define xmlCleanupParser fn_xmlCleanupParser
    64 # define xmlDocGetRootElement fn_xmlDocGetRootElement
    65 # define xmlFreeDoc fn_xmlFreeDoc
    66 # define xmlReadMemory fn_xmlReadMemory
    67 
    68 static bool
    69 load_dll_functions (HMODULE library)
    70 {
    71   LOAD_DLL_FN (library, htmlReadMemory);
    72   LOAD_DLL_FN (library, xmlReadMemory);
    73   LOAD_DLL_FN (library, xmlDocGetRootElement);
    74   LOAD_DLL_FN (library, xmlFreeDoc);
    75   LOAD_DLL_FN (library, xmlCleanupParser);
    76   LOAD_DLL_FN (library, xmlCheckVersion);
    77   return true;
    78 }
    79 
    80 #else  /* !WINDOWSNT */
    81 
    82 static bool
    83 libxml2_loaded_p (void)
    84 {
    85   return true;
    86 }
    87 
    88 #endif  /* !WINDOWSNT */
    89 
    90 static bool
    91 init_libxml2_functions (void)
    92 {
    93 #ifdef WINDOWSNT
    94   if (libxml2_loaded_p ())
    95     return true;
    96   else
    97     {
    98       HMODULE library;
    99 
   100       if (!(library = w32_delayed_load (Qlibxml2)))
   101         {
   102           message1 ("libxml2 library not found");
   103           return false;
   104         }
   105 
   106       if (! load_dll_functions (library))
   107         goto bad_library;
   108 
   109       Vlibrary_cache = Fcons (Fcons (Qlibxml2, Qt), Vlibrary_cache);
   110       return true;
   111     }
   112 
   113  bad_library:
   114   Vlibrary_cache = Fcons (Fcons (Qlibxml2, Qnil), Vlibrary_cache);
   115 
   116   return false;
   117 #else  /* !WINDOWSNT */
   118   return true;
   119 #endif  /* !WINDOWSNT */
   120 }
   121 
   122 static Lisp_Object
   123 make_dom (xmlNode *node)
   124 {
   125   if (node->type == XML_ELEMENT_NODE)
   126     {
   127       Lisp_Object result = list1 (intern ((char *) node->name));
   128       xmlNode *child;
   129       xmlAttr *property;
   130       Lisp_Object plist = Qnil;
   131 
   132       /* First add the attributes. */
   133       property = node->properties;
   134       while (property != NULL)
   135         {
   136           if (property->children &&
   137               property->children->content)
   138             {
   139               char *content = (char *) property->children->content;
   140               plist = Fcons (Fcons (intern ((char *) property->name),
   141                                     build_string (content)),
   142                              plist);
   143             }
   144           property = property->next;
   145         }
   146       result = Fcons (Fnreverse (plist), result);
   147 
   148       /* Then add the children of the node. */
   149       child = node->children;
   150       while (child != NULL)
   151         {
   152           result = Fcons (make_dom (child), result);
   153           child = child->next;
   154         }
   155 
   156       return Fnreverse (result);
   157     }
   158   else if (node->type == XML_TEXT_NODE || node->type == XML_CDATA_SECTION_NODE)
   159     {
   160       if (node->content)
   161         return build_string ((char *) node->content);
   162       else
   163         return Qnil;
   164     }
   165   else if (node->type == XML_COMMENT_NODE)
   166     {
   167       if (node->content)
   168         return list3 (intern ("comment"), Qnil,
   169                       build_string ((char *) node->content));
   170       else
   171         return Qnil;
   172     }
   173   else
   174     return Qnil;
   175 }
   176 
   177 static Lisp_Object
   178 parse_region (Lisp_Object start, Lisp_Object end, Lisp_Object base_url,
   179               Lisp_Object discard_comments, bool htmlp)
   180 {
   181   xmlDoc *doc;
   182   Lisp_Object result = Qnil;
   183   const char *burl = "";
   184   ptrdiff_t istart, iend, istart_byte, iend_byte;
   185   unsigned char *buftext;
   186 
   187   xmlCheckVersion (LIBXML_VERSION);
   188 
   189   if (NILP (start))
   190     start = Fpoint_min ();
   191 
   192   if (NILP (end))
   193     end = Fpoint_max ();
   194 
   195   validate_region (&start, &end);
   196 
   197   istart = XFIXNUM (start);
   198   iend = XFIXNUM (end);
   199   istart_byte = CHAR_TO_BYTE (istart);
   200   iend_byte = CHAR_TO_BYTE (iend);
   201 
   202   if (istart < GPT && GPT < iend)
   203     move_gap_both (iend, iend_byte);
   204 
   205   if (! NILP (base_url))
   206     {
   207       CHECK_STRING (base_url);
   208       burl = SSDATA (base_url);
   209     }
   210 
   211   buftext = BYTE_POS_ADDR (istart_byte);
   212 #ifdef REL_ALLOC
   213   /* Prevent ralloc.c from relocating the current buffer while libxml2
   214      functions below read its text.  */
   215   r_alloc_inhibit_buffer_relocation (1);
   216 #endif
   217   if (htmlp)
   218     doc = htmlReadMemory ((char *)buftext,
   219                           iend_byte - istart_byte, burl, "utf-8",
   220                           HTML_PARSE_RECOVER|HTML_PARSE_NONET|
   221                           HTML_PARSE_NOWARNING|HTML_PARSE_NOERROR|
   222                           HTML_PARSE_NOBLANKS);
   223   else
   224     doc = xmlReadMemory ((char *)buftext,
   225                          iend_byte - istart_byte, burl, "utf-8",
   226                          XML_PARSE_NONET|XML_PARSE_NOWARNING|
   227                          XML_PARSE_NOBLANKS |XML_PARSE_NOERROR);
   228 
   229 #ifdef REL_ALLOC
   230   r_alloc_inhibit_buffer_relocation (0);
   231 #endif
   232   /* If the assertion below fails, malloc was called inside the above
   233      libxml2 functions, and ralloc.c caused relocation of buffer text,
   234      so we could have read from unrelated memory.  */
   235   eassert (buftext == BYTE_POS_ADDR (istart_byte));
   236 
   237   if (doc != NULL)
   238     {
   239       Lisp_Object r = Qnil;
   240       if (NILP(discard_comments))
   241         {
   242           /* If the document has toplevel comments, then this should
   243              get us the nodes and the comments. */
   244           xmlNode *n = doc->children;
   245 
   246           while (n) {
   247             if (!NILP (r))
   248               result = Fcons (r, result);
   249             r = make_dom (n);
   250             n = n->next;
   251           }
   252         }
   253 
   254       if (NILP (result)) {
   255         /* The document doesn't have toplevel comments or we discarded
   256            them.  Get the tree the proper way. */
   257         xmlNode *node = xmlDocGetRootElement (doc);
   258         if (node != NULL)
   259           result = make_dom (node);
   260       } else
   261         result = Fcons (Qtop, Fcons (Qnil, Fnreverse (Fcons (r, result))));
   262 
   263       xmlFreeDoc (doc);
   264     }
   265 
   266   return result;
   267 }
   268 
   269 void
   270 xml_cleanup_parser (void)
   271 {
   272   if (libxml2_loaded_p ())
   273     xmlCleanupParser ();
   274 }
   275 
   276 DEFUN ("libxml-parse-html-region", Flibxml_parse_html_region,
   277        Slibxml_parse_html_region,
   278        0, 4, 0,
   279        doc: /* Parse the region as an HTML document and return the parse tree.
   280 If START is nil, it defaults to `point-min'.  If END is nil, it
   281 defaults to `point-max'.
   282 
   283 If BASE-URL is non-nil, it is used if and when reporting errors and
   284 warnings from the underlying libxml2 library.  Currently, errors and
   285 warnings from the library are suppressed, so this argument is largely
   286 ignored.
   287 
   288 If you want comments to be stripped, use the `xml-remove-comments'
   289 function to strip comments before calling this function.  */)
   290   (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, Lisp_Object discard_comments)
   291 {
   292   if (init_libxml2_functions ())
   293     return parse_region (start, end, base_url, discard_comments, true);
   294   return Qnil;
   295 }
   296 
   297 DEFUN ("libxml-parse-xml-region", Flibxml_parse_xml_region,
   298        Slibxml_parse_xml_region,
   299        0, 4, 0,
   300        doc: /* Parse the region as an XML document and return the parse tree.
   301 If START is nil, it defaults to `point-min'.  If END is nil, it
   302 defaults to `point-max'.
   303 
   304 If BASE-URL is non-nil, it is used if and when reporting errors and
   305 warnings from the underlying libxml2 library.  Currently, errors and
   306 warnings from the library are suppressed, so this argument is largely
   307 ignored.
   308 
   309 If you want comments to be stripped, use the `xml-remove-comments'
   310 function to strip comments before calling this function.  */)
   311   (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, Lisp_Object discard_comments)
   312 {
   313   if (init_libxml2_functions ())
   314     return parse_region (start, end, base_url, discard_comments, false);
   315   return Qnil;
   316 }
   317 #endif /* HAVE_LIBXML2 */
   318 
   319 
   320 
   321 DEFUN ("libxml-available-p", Flibxml_available_p, Slibxml_available_p, 0, 0, 0,
   322        doc: /* Return t if libxml2 support is available in this instance of Emacs.*/)
   323   (void)
   324 {
   325 #ifdef HAVE_LIBXML2
   326 # ifdef WINDOWSNT
   327   Lisp_Object found = Fassq (Qlibxml2, Vlibrary_cache);
   328   if (CONSP (found))
   329     return XCDR (found);
   330   else
   331     {
   332       Lisp_Object status;
   333       status = init_libxml2_functions () ? Qt : Qnil;
   334       Vlibrary_cache = Fcons (Fcons (Qlibxml2, status), Vlibrary_cache);
   335       return status;
   336     }
   337 # else
   338   return Qt;
   339 # endif /* WINDOWSNT */
   340 #else
   341   return Qnil;
   342 #endif  /* HAVE_LIBXML2 */
   343 }
   344 
   345 /***********************************************************************
   346                             Initialization
   347  ***********************************************************************/
   348 void
   349 syms_of_xml (void)
   350 {
   351 #ifdef HAVE_LIBXML2
   352   defsubr (&Slibxml_parse_html_region);
   353   defsubr (&Slibxml_parse_xml_region);
   354 #endif
   355   defsubr (&Slibxml_available_p);
   356 }

/* [<][>][^][v][top][bottom][index][help] */