Fix for negated class escape misinterpretation in regex character classes

  See the following bug entry:

  http://sourceforge.net/tracker/index.php?func=detail&aid=1760116&group_id=11005&atid=111005
	    [ 1760116 ] Negated escape sequences misinterpreted in character class

  This fix simply adds a few more character tables so that the negated charset's
  characters can be added to the []-bracketed custom charset, as is the case for
  the positive charset escapes.

  Interestingly, I notice that (?n\W) does not match newlines (my patch allows
  (?n[\W]) to do so, which is rather inconsistent). This is true also for
  \L, \D. Also \y without (?n ) around it will match newline. I believe these
  to be faults.

diff -u nedit_official/source/regularExp.c nedit_mod/source/regularExp.c
--- nedit_official/source/regularExp.c	2006-08-13 23:47:45.000000000 +0200
+++ nedit_mod/source/regularExp.c	2007-07-26 12:58:20.852041600 +0200
@@ -486,6 +486,14 @@
 
 static unsigned char  ASCII_Digits [] = "0123456789"; /* Same for all */
                                                       /* locales.     */
+
+static unsigned char  NWhite_Space [ALNUM_CHAR_SIZE]; /* negated classes */
+static unsigned char  NWord_Char   [ALNUM_CHAR_SIZE];
+static unsigned char  NLetter_Char [ALNUM_CHAR_SIZE];
+
+static unsigned char  NASCII_Digits [ALNUM_CHAR_SIZE];/* Same for all */
+                                                      /* locales.     */
+
 static int            Is_Case_Insensitive;
 static int            Match_Newline;
 
@@ -2277,97 +2285,70 @@
       return ret_val; /* Just checking if this is a valid shortcut escape. */
    }
 
-   switch (c) {
-      case 'd':
-      case 'D':
-         if (emit == EMIT_CLASS_BYTES) {
-            class = ASCII_Digits;
-         } else if (emit == EMIT_NODE) {
-            ret_val = (islower (c) ? emit_node (DIGIT)
-                                   : emit_node (NOT_DIGIT));
-         }
-
-         break;
-
-      case 'l':
-      case 'L':
-         if (emit == EMIT_CLASS_BYTES) {
-            class = Letter_Char;
-         } else if (emit == EMIT_NODE) {
-            ret_val = (islower (c) ? emit_node (LETTER)
-                                   : emit_node (NOT_LETTER));
-         }
-
-         break;
-
-      case 's':
-      case 'S':
-         if (emit == EMIT_CLASS_BYTES) {
-            if (Match_Newline) emit_byte ('\n');
-
-            class = White_Space;
-         } else if (emit == EMIT_NODE) {
-            if (Match_Newline) {
-               ret_val = (islower (c) ? emit_node (SPACE_NL)
-                                      : emit_node (NOT_SPACE_NL));
-            } else {
-               ret_val = (islower (c) ? emit_node (SPACE)
-                                      : emit_node (NOT_SPACE));
-            }
-         }
-
-         break;
-
-      case 'w':
-      case 'W':
-         if (emit == EMIT_CLASS_BYTES) {
-            class = Word_Char;
-         } else if (emit == EMIT_NODE) {
-            ret_val = (islower (c) ? emit_node (WORD_CHAR)
-                                   : emit_node (NOT_WORD_CHAR));
-         }
-
-         break;
-
-      /* Since the delimiter table is not available at regex compile time \B,
-         \Y and \Y can only generate a node.  At run time, the delimiter table
-         will be available for these nodes to use. */
-
-      case 'y':
-
-         if (emit == EMIT_NODE) {
-            ret_val = emit_node (IS_DELIM);
-         } else {
-            REG_FAIL ("internal error #5 `shortcut_escape\'");
-         }
-
-         break;
-
-      case 'Y':
-
-         if (emit == EMIT_NODE) {
-            ret_val = emit_node (NOT_DELIM);
-         } else {
-            REG_FAIL ("internal error #6 `shortcut_escape\'");
+   if (emit == EMIT_CLASS_BYTES) {
+      /* we need to add '\n' for classes non-digit \D, non-letter \L,
+         non-word \W and whitespace \s if matching newlines */
+      if (Match_Newline) {
+         switch (c) {
+            case 'D':
+            case 'L':
+            case 's':
+            case 'W':
+               emit_byte ('\n');
+               break;
+            default:
+               break;
          }
+      }
+      /* now provide all the extra characters required for the class */
+      switch (c) {
+         case 'd': class = ASCII_Digits;  break;
+         case 'D': class = NASCII_Digits; break;
+         case 'l': class = Letter_Char;   break;
+         case 'L': class = NLetter_Char;  break;
+         case 's': class = White_Space;   break;
+         case 'S': class = NWhite_Space;  break;
+         case 'w': class = Word_Char;     break;
+         case 'W': class = NWord_Char;    break;
+
+         /* Since the delimiter table is not available at regex compile time
+            \B, \Y and \Y can only generate a node.  At run time, the delimiter
+            table will be available for these nodes to use. */
+         case 'y': REG_FAIL ("internal error #5 `shortcut_escape\'");
+         case 'Y': REG_FAIL ("internal error #6 `shortcut_escape\'");
+         case 'B': REG_FAIL ("internal error #7 `shortcut_escape\'");
+         default:  REG_FAIL ("internal error #8 `shortcut_escape\'");
+      }
+   }
+   else if (emit == EMIT_NODE) {
+      switch (c) {
+         case 'd': ret_val = emit_node (DIGIT);          break;
+         case 'D': ret_val = emit_node (NOT_DIGIT);      break;
+
+         case 'l': ret_val = emit_node (LETTER);         break;
+         case 'L': ret_val = emit_node (NOT_LETTER);     break;
+
+         case 's': ret_val = Match_Newline ? emit_node (SPACE_NL)
+                                           : emit_node (SPACE);
+                   break;
+         case 'S': ret_val = Match_Newline ? emit_node (NOT_SPACE_NL)
+                                           : emit_node (NOT_SPACE);
+                   break;
 
-         break;
+         case 'w': ret_val = emit_node (WORD_CHAR);      break;
+         case 'W': ret_val = emit_node (NOT_WORD_CHAR);  break;
 
-      case 'B':
+         case 'y': ret_val = emit_node (IS_DELIM);       break;
+         case 'Y': ret_val = emit_node (NOT_DELIM);      break;
 
-         if (emit == EMIT_NODE) {
-            ret_val = emit_node (NOT_BOUNDARY);
-         } else {
-            REG_FAIL ("internal error #7 `shortcut_escape\'");
-         }
+         case 'B': ret_val = emit_node (NOT_BOUNDARY);   break;
 
-         break;
-
-      default:
-         /* We get here if there isn't a case for every character in
-            the string "codes" */
+         default:
+            /* We get here if there isn't a case for every character in
+               the string "codes" */
 
-         REG_FAIL ("internal error #8 `shortcut_escape\'");
+            REG_FAIL ("internal error #8 `shortcut_escape\'");
+      }
    }
 
    if (emit == EMIT_NODE  &&  c != 'B') {
@@ -2935,46 +2916,73 @@
 
    static int initialized = 0;
    static int underscore = (int) '_';
-          int i, word_count, letter_count, space_count;
+          int i, word_count, letter_count, space_count,
+                 nword_count, nletter_count, nspace_count, ndigit_count;
 
    if (!initialized) {
       initialized  = 1; /* Only need to generate character sets once. */
       word_count   = 0;
       letter_count = 0;
       space_count  = 0;
+      nword_count   = 0;
+      nletter_count = 0;
+      nspace_count  = 0;
+      ndigit_count  = 0;
+
+      /* for every 8-bit value except '\0' == 0 */
+      for (i = 1; i <= (int)UCHAR_MAX; i++) {
+         /* Note: Whether or not newline is considered to be a member of a class
+            is handled by switches within the original regex and is thus omitted
+            here. */
+         if (i == (int) '\n')
+            continue;
 
-      for (i = 1; i < (int)UCHAR_MAX; i++) {
          if (isalnum (i) || i == underscore) {
             Word_Char [word_count++] = (unsigned char) i;
+         } else {
+            NWord_Char [nword_count++] = (unsigned char) i;
          }
 
          if (isalpha (i)) {
             Letter_Char [letter_count++] = (unsigned char) i;
+         } else {
+            NLetter_Char [nletter_count++] = (unsigned char) i;
          }
 
-         /* Note: Whether or not newline is considered to be whitespace is
-            handled by switches within the original regex and is thus omitted
-            here. */
-
-         if (isspace (i) && (i != (int) '\n')) {
+         if (isspace (i)) {
             White_Space [space_count++] = (unsigned char) i;
+         } else {
+            NWhite_Space [nspace_count++] = (unsigned char) i;
+         }
+
+         if (strchr ((char *) ASCII_Digits, i) == NULL) {
+            NASCII_Digits[ndigit_count++] = (unsigned char) i;
          }
 
          /* Make sure arrays are big enough.  ("- 2" because of zero array
             origin and we need to leave room for the NULL terminator.) */
 
-         if (word_count   > (ALNUM_CHAR_SIZE  - 2) ||
-             space_count  > (WHITE_SPACE_SIZE - 2) ||
-             letter_count > (ALNUM_CHAR_SIZE  - 2)) {
+         if (word_count    > (ALNUM_CHAR_SIZE  - 2) ||
+             space_count   > (WHITE_SPACE_SIZE - 2) ||
+             letter_count  > (ALNUM_CHAR_SIZE  - 2) ||
+             nword_count   > (ALNUM_CHAR_SIZE  - 2) ||
+             nspace_count  > (ALNUM_CHAR_SIZE  - 2) ||
+             nletter_count > (ALNUM_CHAR_SIZE  - 2) ||
+             ndigit_count  > (ALNUM_CHAR_SIZE  - 2)) {
 
             reg_error ("internal error #9 `init_ansi_classes\'");
             return (0);
          }
       }
 
-      Word_Char   [word_count]  = '\0';
-      Letter_Char [word_count]  = '\0';
-      White_Space [space_count] = '\0';
+      Word_Char     [word_count]    = '\0';
+      Letter_Char   [letter_count]  = '\0';
+      White_Space   [space_count]   = '\0';
+
+      NWord_Char    [nword_count]   = '\0';
+      NLetter_Char  [nletter_count] = '\0';
+      NWhite_Space  [nspace_count]  = '\0';
+      NASCII_Digits [ndigit_count]  = '\0';
    }
 
    return (1);