+ chg: force UCHARDET GB18030 detection to use CED's result, if applicable

+ cln: repository cleanup
This commit is contained in:
Rainer Kottenhoff 2019-03-09 11:10:41 +01:00
parent 1d215a2c92
commit c68c421812
16 changed files with 72 additions and 1770 deletions

1
.gitignore vendored
View File

@ -63,3 +63,4 @@ Thumbs.db
/np3portableapp/Notepad3Portable/App/Notepad3/x86/*.exe
/np3portableapp/Notepad3Portable/App/Notepad3/x86/lng
/np3portableapp/Notepad3Portable/App/themes/
/np3portableapp/Notepad3Portable/Other/Help/*.txt

View File

@ -1 +1 @@
1651
1653

File diff suppressed because it is too large Load Diff

View File

@ -1,308 +0,0 @@
Keyboard Shortcuts for Notepad3
File
Ctrl+N New file.
Ctrl+F4 Close file, identical with Ctrl+N.
Ctrl+O Open file.
F5 Reload file.
F8 Reload file without encoding detection.
Ctrl+F8 Reload file with Unicode detection toggled.
Shift+F8 Reload file with ANSI/UTF-8 defaults toggled.
Alt+F8 Reload file without file variable parsing.
Ctrl+S Save file.
F6 Save file as.
Ctrl+F6 Save file copy.
Ctrl+P Print file.
Ctrl+Alt+H Open recent file.
Tools
Alt+N Open document in new window.
Alt+0 Open new empty window.
Ctrl+M Run metapath file browser plugin.
Ctrl+L Launch document.
Alt+L Open with.
Ctrl+R Run command.
Favorites
Alt+I Open favorites.
Alt+K Add to favorites.
Alt+F9 Manage favorites.
Edit
Ctrl+Z Undo.
Ctrl+Shift+Z Redo.
Alt+Backspace Undo.
Ctrl+Y Redo.
Ctrl+Shift+Y Undo.
Ctrl+X Cut selection / Cut current line, if no selection.
Shift+Del Cut.
Ctrl+C Copy selection / Copy current line, if no selection.
Alt+C Copy all.
Ctrl+E Copy add.
Ctrl+V Paste.
Shift+Ins Paste.
Ctrl+K Swap.
Del Clear.
Ctrl+A Select all.
Alt+Shift+Arrows Rectangular selection.
Ctrl+Shift+Enter New line with toggled auto indent option. (orig-NP2: Ctrl+Enter)
Ctrl+PgUp/PgDn Goto previous/next block.
Ctrl+Shift+PgUp/PgDn Select to previous/next block.
Char, Word
Ctrl+Space Select word (or line).
Ctrl+Backspace Delete word before/left.
Ctrl+Del Delete word after/right.
Ctrl+Tab Insert tabulator.
Lines
Ctrl+Shift+Space Select line.
Ctrl+Shift+Up Move line (block) up.
Ctrl+Shift+Down Move line (block) down.
Ctrl+D Duplicate line.
Ctrl+Shift+X Cut line.
Ctrl+Shift+C Copy line.
Ctrl+Shift+D Delete line.
Ctrl+Shift+Backspace Delete line left.
Ctrl+Shift+Del Delete line right.
Ctrl+Shift+W Column wrap.
Ctrl+I Split lines.
Ctrl+J Join lines.
Ctrl+Shift+J Join paragraphs.
Block
Tab Indent selected block.
Shift+Tab Unindent selected block.
Alt+Q Enclose selection.
Alt+D Duplicate selection.
Alt+B Pad with spaces.
Alt+Z Strip first character.
Alt+U Strip last character.
Alt+W Strip trailing blanks.
Alt+P Compress whitespace.
Alt+R Remove blank lines.
Alt+M Modify lines.
Alt+O Sort lines.
Convert
Ctrl+Shift+U Make uppercase.
Ctrl+U Make lowercase.
Ctrl+Alt+I Invert case.
Ctrl+Alt+T Title case.
Ctrl+Alt+S Sentence case.
Ctrl+Shift+S Convert tabs to spaces.
Ctrl+Shift+T Convert spaces to tabs.
Ctrl+Shift+A Convert to ANSI.
Ctrl+Shift+O Convert to OEM.
Insert
Alt+X HTML/XML tag.
Ctrl+F5 Time/date (short form).
Ctrl+Shift+F5 Time/date (long form).
Ctrl+F9 Filename.
Ctrl+Shift+F9 Path and filename.
Ctrl+Shift+. Insert GUID.
Special
Ctrl+Q Block comment (toggle).
Ctrl+Shift+Q Stream comment.
Ctrl+Shift+E URL Encode.
Ctrl+Shift+R URL Decode.
Ctrl+Alt+E Escape C Special Chars.
Ctrl+Alt+U Unescape C Special Chars.
Ctrl+B Find matching brace.
Ctrl+Shift+B Select to matching brace.
Ctrl+1 Enclose within ''.
Ctrl+2 Enclose within "".
Ctrl+3 Enclose within ().
Ctrl+4 Enclose within [].
Ctrl+5 Enclose within {}.
Ctrl+6 Enclose within ``.
Shift+F5 Update timestamps.
Ctrl+, Jump to selection start.
Ctrl+. Jump to selection end.
Find, Replace
Ctrl+F Find.
Alt+F3 Save find text.
F3 Find next.
Shift+F3 Find previous.
Ctrl+F3 Find next word or selection.
Ctrl+Shift+F3 Find previous word or selection.
Ctrl+Alt+F2 Expand selection to next match. (orig-NP2: F2)
Ctrl+Alt+Shift+F2 Expand selection to previous match. (orig-NP2: Shift+F2)
Ctrl+H Replace.
F4 Replace next.
Ctrl+G Jump to line.
Bookmarks
F2 Jump to next bookmark.
Shift+F2 Jump to previous bookmark.
Ctrl+F2 Toggle bookmark.
Alt+F2 Clear all.
Syntax scheme, Font
F12 Select syntax scheme.
Shift+F12 Select 2nd default syntax scheme.
Ctrl+F12 Customize syntax schemes.
Alt+F12 Select default font.
F11 Select default text syntax scheme.
Ctrl+F11 Select web source code syntax scheme.
Shift+F11 Select XML document syntax scheme.
View
Ctrl+W Toggle word wrap.
Ctrl+Shift+L Show long line marker.
Ctrl+Shift+G Show indentation guides.
Ctrl+Shift+N Show line numbers.
Ctrl+Shift+M Show selection margin.
Ctrl+Shift+7 Show wrap symbols.
Ctrl+Shift+8 Show whitespace.
Ctrl+Shift+9 Show line endings.
Ctrl+Shift+V Toggle visual brace matching.
Ctrl+Shift+I Highlight current line.
Ctrl+Shift+Alt+F Activate/Deactivate code folding.
Ctrl+Shift+F Toggle all folds.
Zoom
Ctrl++ Zoom in.
Ctrl+- Zoom out.
Ctrl+0 Reset zoom.
Settings
Ctrl+T Tab settings.
Alt++ Increase limit for long lines.
Alt+- Decrease limit for long lines.
Ctrl+Shift+H Toggle auto close HTML/XML.
Alt+T Always on top.
Ctrl+Numpad_* Transparent mode.
Ctrl+9 Display text excerpt in title.
F7 Save settings now.
Ctrl+F7 Jump to ini-file.
Misc.
Esc Optionally minimize or exit Notepad3.
Shift+Esc Save file and exit Notepad3.
F1 Online Documentation.
Shift+F1 Display Info version "About…"
Regular Expression Syntax
. Matches any character
\( This marks the start of a region for tagging a match.
\) This marks the end of a tagged region.
\n Where n is 1 through 9 refers to the first through ninth
tagged region when replacing. For example, if the search
string was Fred\([1-9]\)XXX and the replace string was
Sam\1YYY, when applied to Fred2XXX this would generate
Sam2YYY.
\< This matches the start of a word.
\> This matches the end of a word.
\x This allows you to use a character x that would otherwise
have a special meaning. For example, \[ would be interpreted
as [ and not as the start of a character set.
[...] This indicates a set of characters, for example, [abc] means
any of the characters a, b or c. You can also use ranges, for
example [a-z] for any lower case character.
[^...] The complement of the characters in the set. For example,
[^A-Za-z] means any character except an alphabetic character.
^ This matches the start of a line (unless used inside a set,
see above).
$ This matches the end of a line.
* This matches 0 or more times. For example, Sa*m matches Sm,
Sam, Saam, Saaam and so on.
+ This matches 1 or more times. For example, Sa+m matches Sam,
Saam, Saaam and so on.
\d Any decimal digit.
\D Any character that is not a decimal digit.
\s Any whitespace character.
\S Any character that is not a whitespace character.
\w Any "word" character.
\W Any "non-word" character.
\xHH Character with hex code HH.
-----> Examples (don't use quotes)
- Quote lines: find "^" replace with "> "
- Unquote lines: find "^> " replace with ""
- Remove line numbers: find "^[0-9]+" replace with ""
- Convert tabs to double spaces: find "\t" replace with " "
Command Line Help
Usage:
Notepad3 [/?] […[Encoding]] […[Line ending mode]] [/e] [/g] [/m] [/l]
[/q] [/s] [/d] [/h] [/x] [/c] [/b] [/n] [/r| [/p] [/t] [/i] [/o]
[/f] [/u] [/v] [/vd] [/y] [/z] [[drive:][path]filename[…]]
file Must be the last argument, no quoted spaces by default.
+ Accept multiple file arguments (with quoted spaces).
- Accept single file argument (without quoted spaces).
Options:
/? Dispay this help message.
… Encoding (/ansi, /unicode, /unicodebe, /utf8, /utf8sig).
… Line ending mode (/crlf, /cr, /lf).
/e File source encoding.
/g Jump to specified position (/g -1 end of file).
/m Match specified text (/m- last, /mr regex, /mb backslash).
/l Auto-reload modified files.
/q Force creation of new files without prompt.
/s Select specified syntax scheme.
/d Select default text scheme.
/h Select Web Source Code scheme.
/x Select XML Document scheme.
/c Open new window and paste clipboard contents.
/b Open new paste board to collect clipboard entries.
/n Always open a new window (/ns single file instance).
/r Reuse window (/rs single file instance).
/p Set window position and size (/p0, /ps, /pf,l,t,r,b,m).
/t Set window title.
/i Start as tray icon.
/o Keep window on top.
/f Specify ini-file (/f0 no ini-file).
/u Launch with elevated privileges.
/v Print file immediately and quit.
/vd Print file (open printer dialog).
/y Search environment PATH in case of relative filename.
/z Skip next (usable for registry-based Notepad replacement).
Examples:
Notepad3 /utf8sig /crlf d:\temp\Test.txt
… Open a new file: "Test.txt" Encoding=UTF-8-BOM, EoL=CRLF.
Notepad3 /v d:\temp\Test.txt
… Print the file: "Test.txt" immediately.

View File

@ -1,51 +0,0 @@
================================================================================
= =
= Notepad3 - light-weight Scintilla-based text editor for Windows =
= =
= (c) Rizonesoft 2008-2019 =
= https://www.rizonesoft.com =
= =
================================================================================
Rizonesoft Notepad3 --- Licenses ---
================================================================================
--------------------------------------------------
License for Notepad3 and MiniPath
--------------------------------------------------
Copyright © 2008-2019 Rizonesoft,
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
3. Neither the name of Florian Balmer nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------
License for Scintilla and SciTE
--------------------------------------------------
Copyright (c) 1998-2002 by Neil Hodgson <neilh@scintilla.org>,
All Rights Reserved.
Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation.
NEIL HODGSON DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL NEIL HODGSON BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGESWHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
--------------------------------------------------
License for Onigmo (Oniguruma-mod) RegEx Engine
--------------------------------------------------
Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>,
Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>,
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,81 +0,0 @@
================================================================================
= =
= Notepad3 - light-weight Scintilla-based text editor for Windows =
= =
= (c) Rizonesoft 2008-2019 =
= https://www.rizonesoft.com =
= =
================================================================================
Rizonesoft Notepad3 --- README ---
================================================================================
--------------------------------------------------
Description
--------------------------------------------------
Notepad like text editor is based on the Scintilla source code.
Notepad3 isbased on code from Notepad2 and MiniPath on code from metapath.
--------------------------------------------------
Changes compared to Flo's official Notepad2 (made in Notepad2-mod):
--------------------------------------------------
- Code folding
- Support for bookmarks
- Option to mark all occurrences of a word
- Updated Scintilla component
- Word auto-completion
- Syntax highlighting support for AutoHotkey (AHK), AutoIt3, AviSynth, Bash,
CMake, CoffeeScript, Inno Setup, LaTeX, Lua, Markdown, NSIS, Ruby, Tcl,
YAML and VHDL scripts.
- Improved support for NFO ANSI art
- Other various minor changes and tweaks
--------------------------------------------------
Changes compared to the Notepad2-mod fork:
--------------------------------------------------
- Additional syntax highlighting support for Awk, D, golang, MATLAB
- State of the art Regular Expression search engine (Onigmu)
- New toolbar icons based on Yusuke Kamiyaman's Fugue Icons
(Purchased by Rizonesoft)
- Hyperlink Hotspot highlighting
(single click Open in Browser (Ctrl) / Load in Editor (Alt))
- New program icon and other small cosmetic changes
- In-App support for AES-256 Rijndael encryption/decryption of files.
(incl. external commandline tool for batch processing)
- Virtual Space rectangular selection box (Alt-Key down)
- High-DPI awareness, including high definition toolbar icons
- Undo/Redo preserves selection
- File History preserves Caret position (optional)
and remembers encoding of file
- Accelerated word navigation
- Preserve caret position of items in file history
- Count occurrences of a marked selection or word
- Count and Mark occurrences of matching search/find expression
- Visual Studio style copy/paste current line (no selection)
- Insert GUIDs
- Dropped support for Windows XP version
- Other various minor changes, tweaks and bugfixes
--------------------------------------------------
Supported Operating Systems:
--------------------------------------------------
Windows 7, 8, 8.1 and 10 both 32-bit and 64-bit
--------------------------------------------------
Development:
--------------------------------------------------
- Florian 'Flo' Balmer (Notepad2) ( http://www.flos-freeware.ch )
- Rainer Kottenhoff ( https://github.com/RaiKoHoff )
--------------------------------------------------
Contributors:
--------------------------------------------------
- Derick Payne (© Rizonesoft) ( https://www.rizonesoft.com )
- XhmikosR (Notepad2-mod) ( https://xhmikosr.github.io/notepad2-mod )
- Kai Liu (CodeFolding) ( http://code.kliu.org/misc/notepad2 )
- RL Vision (Bookmarks) ( https://www.rlvision.com/notepad2/about.php )
- Aleksandar Lekov (MarkOcc./AutoCompl.)
- Bruno Barbieri
- Matthew Ellis (MinimizeToTray)
- Igal Tabachnik (RelaunchElevated) ( https://github.com/hmemcpy )
- Et alii @ GitHub ( https://github.com/XhmikosR/notepad2-mod/graphs/contributors )

View File

@ -1,119 +0,0 @@
Prototype enduser documentation
If you open a file which was encrypted by this program, you'll be prompted
to supply a passphrase. This passphrase becomes the default to be used
when saving files. The passphrase is not the actual encryption key, but
is used to generate a 256 bit encryption key called the file key.
Master Keys
An Encrypted file can optionally contain a copy of its own file key, encrypted
with a master key, derived from a master passphrase. This allows anyone who knows
the master passphrase to decode any file encrypted with any file key which uses this
master key structure. As long as the file passphrase is not changed, the master key
can be propogated to new versions of the file without typing the master phrase
again.
Why use a master key?
#1) Data recovery. It is not generally a good idea to use the same
passphrase for all files, or to continue using the same passphrase forever.
The principle hazard is that if even one key is revealed, every encrypted file
you have ever created can also be read. Consequently, different keys should be used
for different files, and over time, those keys should change. Since the
keys change, they are subject to being lost or forgotton, resulting in
lost data - it's still there, but no one can decode it. If you use a master key,
and you have misplaced the file key, you can still recover your data. Since master
key is not routinely used to decrypt the file, and never needs to be shared with anyone,
is is much less likely to be compromised, so it is reasonable for a good master
key to be used for a very long time in a lot of different files.
Recommendation: Use a master passphrase that will be very hard for anyone to guess
and very hard for you to forget. For example:
"My Favorite movie is Gone with the Wind"
"When it rains in New York, even Chicago is a better place to be"
Never tell anyone what it is or write it down. It's an EMERGENCY measure to prevent
catastrophic data loss, so treat it that way. Never use it to read or open any encrypted
file except for testing, or if you have really lost the file passphrase.
#2) Trapdoor access. Sometimes it is desirable to allow a second party
to decode the file without knowing the encryption passphrase - a good example
is where an automated program is intended to read an encrypted file that is
prepared by a human. The program has to have the passphrase or the key embedded
in it somewhere. It's possible for a dedicated attacker to find it, but it's
much more likely that the source of compromise will be clumsy humans. Allowing
the program to use the master key and humans to use the ordinary file keys will
allow the routine-use keys to be changed as often as necesary, while still
letting the program read the files without being told the new key.
Technical Details:
Passphrase Management:
256 bit encryption keys are geneated from ascii passphrases by
passing the passphrase through a SHA256 hash. Passphrases are never
Stored anywhere except in the dynamic memory of the encrypting program.
Key management over file generations: If the file is opened
using a file passphrase, the passphrase is retained and used
as the default for the passphrase dialog.
If the file is opened using a master passphrase, the recovered
file key is used as the default encryption for new files. This
allows an editor who does not know the file passphrase to propogate
a file key he could not create.
If the file contains a master key, and neither the file or master
passphrase is changed, then the retained, master-encrypted file
key is copied into the next file generation. (It is still valid).
This allows an editor who knows only the file passphrase to propogate
a master key he could not create.
Encrypted files start with an 8 byte preable, the first 4 bytes are
a "magic number" to identify the file type (currently 0x04030201)
and a 4 byte subfile type, (currently either 0x00000001 or 0x00000002)
The next 16 bytes are the initialization vector for the AES engine,
to be used with the file key. Each file gets a unique 16 bytes of
pseudorandom noise.
Next, for master keyed files, is a 16 byte IV for the master key,
followed by a 32 byte block containing the file key, encrypted
with the master key, using the master key IV and CBC block chaining.
Next, is the actual file data, encrypted using the file key and the IV,
and CBC block chaining.
Finally, are 1-16 bytes of padding to round out the AES block. Note that
there are never 0 bytes of padding.
A Word about pass phrases and overall security.
While this encryption scheme uses high quality AES encryption and quite long 256 bit keys,
that is almost irrelevant to the overall security of the system. It's like having a very
expensive lock on your front door. Thieves won't go to great lengths to pick your lock; they
will simply break a window instead. The weak link in this encryption scheme is YOU and your
selection of pass phrases. If your encrypted files are compromised, the most likely, by far,
method is the simplest; (1) they ask, you tell. or (2) they find the scrap of paper where you
wrote the passphrase or (3) some key logger watches you type the passphrase. The only
other likely method is a dictionary-type attack using a program to try lots of possible
passphrases. Any short, word-like passphrase CAN be compromised using a few days of computer
time.
Finally, consider the suitablility of this encryption scheme for your purpose.
If your goal is to prevent your wife from reading your girlfriend's phone number
in your address book: definitely.
If your goal is to prevent disclosure of sensitive data if your laptop is stolen:
most likely.
If your goal is to prevent fishing by the IRS, should they ever become curious about you:
don't count on it. They'll throw you in jail until you tell them the password, or install a
key logger and wait for you to tell them voluntarily.
If your goal is to keep secrets people who are definitely out to get you, and are willing
to shove bamboo splints under you fingernails until you talk: don't even think about it.

View File

@ -3,7 +3,7 @@
<assemblyIdentity
name="Notepad3"
processorArchitecture="*"
version="5.19.308.1651"
version="5.19.309.1653"
type="win32"
/>
<description>Notepad3 XpErImEnTaL</description>

View File

@ -31,7 +31,7 @@
#define STRSAFE_NO_DEPRECATE // don't allow deprecated functions
#include <strsafe.h>
#include <future> // async detection
//~#include <future> // async detection
#include "resource.h"
@ -691,6 +691,7 @@ extern "C" int Encoding_AnalyzeText
char encodingStrg_CED[MAX_ENC_STRG_LEN] = { '\0' };
int cpiEncoding_CED = CPI_NONE;
#if FALSE
size_t const largeFile = static_cast<size_t>(Settings2.FileLoadWarningMB) * 1024LL * 1024LL;
if (len < largeFile)
@ -710,23 +711,37 @@ extern "C" int Encoding_AnalyzeText
cpiEncoding_UCD = cpiUCD.get();
cpiEncoding_CED = cpiCED.get();
}
#else
// no need to run analyzers asynchron, cause they analyze only the first KB of large files ...
cpiEncoding_UCD = AnalyzeText_UCHARDET(text, len, encodingHint, &ucd_cnf, encodingStrg_UCD, MAX_ENC_STRG_LEN);
cpiEncoding_CED = AnalyzeText_CED(text, len, encodingHint, &ced_cnf, encodingStrg_CED, MAX_ENC_STRG_LEN);
#endif
float confidence = 0.0f;
float const ucd_confidence = ucd_cnf;
float const ced_confidence = ced_cnf;
#if 0
// --------------------------------------------------------------------------
// GB18030 (UCD always) to GBK detection adjustment
// --------------------------------------------------------------------------
if ((Encoding_GetCodePage(cpiEncoding_UCD) == 54936 /*GB-18030*/) &&
(Encoding_GetCodePage(cpiEncoding_CED) != 20936 /*GB-2312-80*/))
if (Encoding_GetCodePage(cpiEncoding_UCD) == 54936)
{
// CED (util/encodings/encoding.cc) changed to predict GB18030 if applicable
cpiEncoding_UCD = cpiEncoding_CED; // choose widely used encoding
switch (Encoding_GetCodePage(cpiEncoding_CED))
{
case 936: // GBK
case 20936: // GB-2312-80
cpiEncoding_UCD = cpiEncoding_CED; // use CED's choice
break;
case 54936:
// CED (util/encodings/encoding.cc) changed to predict GB18030 if applicable
default:
// keep GB-18030
break;
}
}
#endif
// --------------------------------------------------------------------------
// vote for encoding prognosis based on confidence levels or reliability

View File

@ -412,13 +412,13 @@
<ClCompile Include="..\uchardet\uchardet\src\nsEUCJPProber.cpp" />
<ClCompile Include="..\uchardet\uchardet\src\nsEUCKRProber.cpp" />
<ClCompile Include="..\uchardet\uchardet\src\nsEUCTWProber.cpp" />
<ClCompile Include="..\uchardet\uchardet\src\nsGB18030Prober.cpp">
<ClCompile Include="..\uchardet\uchardet\src\nsGB18030Prober.cpp" />
<ClCompile Include="..\uchardet\uchardet\src\nsGB2312Prober.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\uchardet\uchardet\src\nsGB2312Prober.cpp" />
<ClCompile Include="..\uchardet\uchardet\src\nsHebrewProber.cpp" />
<ClCompile Include="..\uchardet\uchardet\src\nsLatin1Prober.cpp" />
<ClCompile Include="..\uchardet\uchardet\src\nsMBCSGroupProber.cpp" />
@ -522,13 +522,13 @@
<ClInclude Include="..\uchardet\uchardet\src\nsEUCJPProber.h" />
<ClInclude Include="..\uchardet\uchardet\src\nsEUCKRProber.h" />
<ClInclude Include="..\uchardet\uchardet\src\nsEUCTWProber.h" />
<ClInclude Include="..\uchardet\uchardet\src\nsGB18030Prober.h">
<ClInclude Include="..\uchardet\uchardet\src\nsGB18030Prober.h" />
<ClInclude Include="..\uchardet\uchardet\src\nsGB2312Prober.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\uchardet\uchardet\src\nsGB2312Prober.h" />
<ClInclude Include="..\uchardet\uchardet\src\nsHebrewProber.h" />
<ClInclude Include="..\uchardet\uchardet\src\nsLatin1Prober.h" />
<ClInclude Include="..\uchardet\uchardet\src\nsMBCSGroupProber.h" />

View File

@ -7,8 +7,8 @@
#define SAPPNAME "Notepad3"
#define VERSION_MAJOR 5
#define VERSION_MINOR 19
#define VERSION_REV 308
#define VERSION_BUILD 1651
#define VERSION_REV 309
#define VERSION_BUILD 1653
#define SCINTILLA_VER 414
#define ONIGMO_REGEX_VER 6.2.0
#define VERSION_PATCH XpErImEnTaL

View File

@ -41,8 +41,8 @@
#include "Big5Freq.tab"
#include "EUCKRFreq.tab"
#include "EUCTWFreq.tab"
#include "GB2312Freq.tab"
//#include "GB18030Freq.tab"
//#include "GB2312Freq.tab"
#include "GB18030Freq.tab"
#define SURE_YES 0.99f
#define SURE_NO 0.01f
@ -80,20 +80,20 @@ EUCKRDistributionAnalysis::EUCKRDistributionAnalysis()
mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO;
}
GB2312DistributionAnalysis::GB2312DistributionAnalysis()
{
mCharToFreqOrder = GB2312CharToFreqOrder;
mTableSize = GB2312_TABLE_SIZE;
mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO;
}
//GB18030DistributionAnalysis::GB18030DistributionAnalysis()
//GB2312DistributionAnalysis::GB2312DistributionAnalysis()
//{
// mCharToFreqOrder = GB18030CharToFreqOrder;
// mTableSize = GB18030_TABLE_SIZE;
// mTypicalDistributionRatio = GB18030_TYPICAL_DISTRIBUTION_RATIO;
// mCharToFreqOrder = GB2312CharToFreqOrder;
// mTableSize = GB2312_TABLE_SIZE;
// mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO;
//}
GB18030DistributionAnalysis::GB18030DistributionAnalysis()
{
mCharToFreqOrder = GB18030CharToFreqOrder;
mTableSize = GB18030_TABLE_SIZE;
mTypicalDistributionRatio = GB18030_TYPICAL_DISTRIBUTION_RATIO;
}
Big5DistributionAnalysis::Big5DistributionAnalysis()
{
mCharToFreqOrder = Big5CharToFreqOrder;

View File

@ -40,7 +40,7 @@
#include "nscore.h"
#define ENOUGH_DATA_THRESHOLD 1024
#define ENOUGH_DATA_THRESHOLD 2048
#define MINIMUM_DATA_THRESHOLD 4
@ -158,23 +158,24 @@ protected:
}
};
class GB2312DistributionAnalysis : public CharDistributionAnalysis
{
public:
GB2312DistributionAnalysis();
protected:
//for GB2312 encoding, we are interested
// first byte range: 0xb0 -- 0xfe
// second byte range: 0xa1 -- 0xfe
//no validation needed here. State machine has done that
PRInt32 GetOrder(const char* str)
{
if ((unsigned char)* str >= (unsigned char)0xb0 && (unsigned char)str[1] >= (unsigned char)0xa1)
return 94 * ((unsigned char)str[0] - (unsigned char)0xb0) + (unsigned char)str[1] - (unsigned char)0xa1;
else
return -1;
}
};
//class GB2312DistributionAnalysis : public CharDistributionAnalysis
//{
//public:
// GB2312DistributionAnalysis();
//protected:
// //for GB2312 encoding, we are interested
// // first byte range: 0xa1 -- 0xf7
// // second byte range: 0xa1 -- 0xfe
//
// //no validation needed here. State machine has done that
// PRInt32 GetOrder(const char* str)
// {
// if ((unsigned char)* str >= (unsigned char)0xa1 && (unsigned char)str[1] >= (unsigned char)0xa1)
// return 94 * ((unsigned char)str[0] - (unsigned char)0xa1) + (unsigned char)str[1] - (unsigned char)0xa1;
// else
// return -1;
// }
//};
class GB18030DistributionAnalysis : public CharDistributionAnalysis
{

View File

@ -47,8 +47,8 @@ const char *ProberName[] =
"UTF-8",
"SJIS",
"EUC-JP",
"GB2312",
//"GB18030",
//"GB2312",
"GB18030",
"EUC-KR",
"Big5",
"EUC-TW",
@ -70,8 +70,8 @@ nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter)
}
if (aLanguageFilter & NS_FILTER_CHINESE_SIMPLIFIED)
{
mProbers[++i] = new nsGB2312Prober(aLanguageFilter == NS_FILTER_CHINESE_SIMPLIFIED);
//~mProbers[++i] = new nsGB18030Prober(aLanguageFilter == NS_FILTER_CHINESE_SIMPLIFIED);
//mProbers[++i] = new nsGB2312Prober(aLanguageFilter == NS_FILTER_CHINESE_SIMPLIFIED);
mProbers[++i] = new nsGB18030Prober(aLanguageFilter == NS_FILTER_CHINESE_SIMPLIFIED);
}
if (aLanguageFilter & NS_FILTER_KOREAN)
{

View File

@ -42,8 +42,8 @@
#include "nsSJISProber.h"
#include "nsUTF8Prober.h"
#include "nsEUCJPProber.h"
#include "nsGB2312Prober.h"
//~#include "nsGB18030Prober.h"
//#include "nsGB2312Prober.h"
#include "nsGB18030Prober.h"
#include "nsEUCKRProber.h"
#include "nsBig5Prober.h"
#include "nsEUCTWProber.h"

View File

@ -262,7 +262,7 @@ const SMModel EUCTWSMModel = {
// GB-2312
#if 0
static PRUint32 GB2312_cls [ 256 / 8 ] = {
//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
@ -314,6 +314,7 @@ const SMModel GB2312SMModel = {
GB2312CharLenTable,
"GB2312",
};
#endif
// GB-18030