| # 2011 March 15 |
| # |
| # The author disclaims copyright to this source code. In place of |
| # a legal notice, here is a blessing: |
| # |
| # May you do good and not evil. |
| # May you find forgiveness for yourself and forgive others. |
| # May you share freely, never taking more than you give. |
| # |
| #*********************************************************************** |
| # This file implements regression tests for SQLite library. |
| # |
| # This file checks to make sure SQLite is able to gracEFully |
| # handle malformed UTF-8. |
| # |
| |
| set testdir [file dirname $argv0] |
| source $testdir/tester.tcl |
| |
| proc utf8_to_ustr2 {s} { |
| set r "" |
| foreach i [split $s ""] { |
| scan $i %c c |
| append r [format \\u%04.4X $c] |
| } |
| set r |
| } |
| |
| proc utf8_to_hstr {in} { |
| regsub -all -- {(..)} $in {%[format "%s" \1]} out |
| subst $out |
| } |
| |
| proc utf8_to_xstr {in} { |
| regsub -all -- {(..)} $in {\\\\x[format "%s" \1]} out |
| subst $out |
| } |
| |
| proc utf8_to_ustr {in} { |
| regsub -all -- {(..)} $in {\\\\u[format "%04.4X" 0x\1]} out |
| subst $out |
| } |
| |
| do_test badutf2-1.0 { |
| db close |
| forcedelete test.db |
| sqlite3 db test.db |
| db eval "PRAGMA encoding = 'UTF-8'" |
| } {} |
| |
| do_test badutf2-4.0 { |
| set S [sqlite3_prepare_v2 db "SELECT ?" -1 dummy] |
| sqlite3_expired $S |
| } {0} |
| |
| foreach { i len uval xstr ustr u2u } { |
| 1 1 00 \x00 {} {} |
| 2 1 01 \x01 "\\u0001" 01 |
| 3 1 3F \x3F "\\u003F" 3F |
| 4 1 7F \x7F "\\u007F" 7F |
| 5 1 80 \x80 "\\u0080" C280 |
| 6 1 C3BF \xFF "\\u00FF" C3BF |
| 7 3 EFBFBD \xEF\xBF\xBD "\\uFFFD" {} |
| } { |
| |
| set hstr [ utf8_to_hstr $uval ] |
| |
| ifcapable bloblit { |
| if {$hstr != "%00"} { |
| do_test badutf2-2.1.$i { |
| set sql "SELECT '$hstr'=CAST(x'$uval' AS text) AS x;" |
| set res [ sqlite3_exec db $sql ] |
| lindex [ lindex $res 1] 1 |
| } {1} |
| do_test badutf2-2.2.$i { |
| set sql "SELECT CAST('$hstr' AS blob)=x'$uval' AS x;" |
| set res [ sqlite3_exec db $sql ] |
| lindex [ lindex $res 1] 1 |
| } {1} |
| } |
| do_test badutf2-2.3.$i { |
| set sql "SELECT hex(CAST(x'$uval' AS text)) AS x;" |
| set res [ sqlite3_exec db $sql ] |
| lindex [ lindex $res 1] 1 |
| } $uval |
| do_test badutf2-2.4.$i { |
| set sql "SELECT hex(CAST(x'$uval' AS text)) AS x;" |
| set res [ sqlite3_exec db $sql ] |
| lindex [ lindex $res 1] 1 |
| } $uval |
| } |
| |
| if {$hstr != "%00"} { |
| do_test badutf2-3.1.$i { |
| set sql "SELECT hex('$hstr') AS x;" |
| set res [ sqlite3_exec db $sql ] |
| lindex [ lindex $res 1] 1 |
| } $uval |
| } |
| |
| # Tcl 8.7 and later do automatic bad-utf8 correction for |
| # characters 0x80 thru 0x9f so test case 5 does not work here. |
| if {$i==5 && $tcl_version>=8.7} { |
| # no-op |
| } else { |
| do_test badutf2-4.1.$i { |
| sqlite3_reset $S |
| sqlite3_bind_text $S 1 $xstr $len |
| sqlite3_step $S |
| utf8_to_ustr2 [ sqlite3_column_text $S 0 ] |
| } $ustr |
| } |
| |
| ifcapable debug { |
| do_test badutf2-5.1.$i { |
| utf8_to_utf8 $uval |
| } $u2u |
| } |
| |
| } |
| |
| do_test badutf2-4.2 { |
| sqlite3_finalize $S |
| } {SQLITE_OK} |
| |
| |
| finish_test |