[web]implement in-house sha1 hash function (#18651)

* implement in-house sha1 hash function

* add comment

* add test description

Co-authored-by: Antoine Clausse <antoine.clausse@overleaf.com>

* add more tests

* dedupe test strings and remove unneeded toString calls

---------

Co-authored-by: Antoine Clausse <antoine.clausse@overleaf.com>
GitOrigin-RevId: aa3f5ab3ff412696c715521381eebf176f503197
This commit is contained in:
Liangjun Song 2024-06-04 11:50:23 +01:00 committed by Copybot
parent a9cc32e6ed
commit 4101f4efeb
6 changed files with 227 additions and 9 deletions

View file

@ -1,5 +1,5 @@
// @ts-ignore // @ts-ignore
import CryptoJSSHA1 from 'crypto-js/sha1' import { generateSHA1Hash } from '../../../shared/utils/sha1'
import { import {
createContext, createContext,
useContext, useContext,
@ -94,9 +94,9 @@ export const ReferencesProvider: FC = ({ children }) => {
const docId = doc.doc_id const docId = doc.doc_id
const snapshot = doc._doc.snapshot const snapshot = doc._doc.snapshot
const now = Date.now() const now = Date.now()
const sha1 = CryptoJSSHA1( const sha1 = generateSHA1Hash(
'blob ' + snapshot.length + '\x00' + snapshot 'blob ' + snapshot.length + '\x00' + snapshot
).toString() )
const CACHE_LIFETIME = 6 * 3600 * 1000 // allow reindexing every 6 hours const CACHE_LIFETIME = 6 * 3600 * 1000 // allow reindexing every 6 hours
const cacheEntry = existingIndexHash[docId] const cacheEntry = existingIndexHash[docId]
const isCached = const isCached =

View file

@ -13,7 +13,7 @@ import _ from 'lodash'
* DS207: Consider shorter variations of null checks * DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/ */
import CryptoJSSHA1 from 'crypto-js/sha1' import { generateSHA1Hash } from '../../shared/utils/sha1'
let ReferencesManager let ReferencesManager
export default ReferencesManager = class ReferencesManager { export default ReferencesManager = class ReferencesManager {
@ -75,9 +75,7 @@ export default ReferencesManager = class ReferencesManager {
const docId = doc.doc_id const docId = doc.doc_id
const snapshot = doc._doc.snapshot const snapshot = doc._doc.snapshot
const now = Date.now() const now = Date.now()
const sha1 = CryptoJSSHA1( const sha1 = generateSHA1Hash('blob ' + snapshot.length + '\x00' + snapshot)
'blob ' + snapshot.length + '\x00' + snapshot
).toString()
const CACHE_LIFETIME = 6 * 3600 * 1000 // allow reindexing every 6 hours const CACHE_LIFETIME = 6 * 3600 * 1000 // allow reindexing every 6 hours
const cacheEntry = this.existingIndexHash[docId] const cacheEntry = this.existingIndexHash[docId]
const isCached = const isCached =

View file

@ -0,0 +1,41 @@
/**
* From: https://github.com/pvorb/node-crypt/blob/master/crypt.js
* Copyright © 2011, Paul Vorbach. All rights reserved.
* Copyright © 2009, Jeff Mott. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of
* conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
* 3. Neither the name Crypto-JS nor the names of its contributors may be used to endorse or
* promote products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// Convert a byte array to big-endian 32-bit words
export function bytesToWords(bytes) {
const words = []
for (let i = 0, b = 0; i < bytes.length; i++, b += 8)
words[b >>> 5] |= bytes[i] << (24 - (b % 32))
return words
}
// Convert big-endian 32-bit words to a byte array
export function wordsToBytes(words) {
const bytes = []
for (let b = 0; b < words.length * 32; b += 8)
bytes.push((words[b >>> 5] >>> (24 - (b % 32))) & 0xff)
return bytes
}

View file

@ -0,0 +1,99 @@
/**
* From https://github.com/pvorb/node-sha1/blob/master/sha1.js
* Copyright © 2009, Jeff Mott. All rights reserved.
* Copyright © 2011, Paul Vorbach. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice, this
* list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
* 3. Neither the name Crypto-JS nor the names of its contributors may be used to
* endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
import { wordsToBytes, bytesToWords } from './crypto'
export function generateSHA1Hash(inputString) {
const encoder = new TextEncoder()
const uint8Array = encoder.encode(inputString)
const m = bytesToWords(uint8Array)
const l = inputString.length * 8
const w = []
let H0 = 1732584193
let H1 = -271733879
let H2 = -1732584194
let H3 = 271733878
let H4 = -1009589776
// Padding
m[l >> 5] |= 0x80 << (24 - (l % 32))
m[(((l + 64) >>> 9) << 4) + 15] = l
for (let i = 0; i < m.length; i += 16) {
const a = H0
const b = H1
const c = H2
const d = H3
const e = H4
for (let j = 0; j < 80; j++) {
if (j < 16) w[j] = m[i + j]
else {
const n = w[j - 3] ^ w[j - 8] ^ w[j - 14] ^ w[j - 16]
w[j] = (n << 1) | (n >>> 31)
}
const t =
((H0 << 5) | (H0 >>> 27)) +
H4 +
(w[j] >>> 0) +
(j < 20
? ((H1 & H2) | (~H1 & H3)) + 1518500249
: j < 40
? (H1 ^ H2 ^ H3) + 1859775393
: j < 60
? ((H1 & H2) | (H1 & H3) | (H2 & H3)) - 1894007588
: (H1 ^ H2 ^ H3) - 899497514)
H4 = H3
H3 = H2
H2 = (H1 << 30) | (H1 >>> 2)
H1 = H0
H0 = t
}
H0 += a
H1 += b
H2 += c
H3 += d
H4 += e
}
const result = wordsToBytes([H0, H1, H2, H3, H4])
// Convert array of bytes to a hex string
// padStart is used to ensure numbers that are
// less than 16 will still be converted into the two-character format
// For example:
// "5" => "05"
// "a" => "0a"
// "ff" => "ff"
return result.map(b => b.toString(16).padStart(2, '0')).join('')
}

View file

@ -1,4 +1,4 @@
import CryptoJSSHA1 from 'crypto-js/sha1' import { generateSHA1Hash } from '../../shared/utils/sha1'
import { debugging, debugConsole } from '@/utils/debugging' import { debugging, debugConsole } from '@/utils/debugging'
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
@ -1286,7 +1286,7 @@ export const { Doc } = (() => {
var needToRecomputeHash = !this.__lastSubmitTimestamp || (age > RECOMPUTE_HASH_INTERVAL) || (age < 0) var needToRecomputeHash = !this.__lastSubmitTimestamp || (age > RECOMPUTE_HASH_INTERVAL) || (age < 0)
if (needToRecomputeHash || debugging) { if (needToRecomputeHash || debugging) {
// send git hash of current snapshot // send git hash of current snapshot
var sha1 = CryptoJSSHA1("blob " + this.snapshot.length + "\x00" + this.snapshot).toString() var sha1 = generateSHA1Hash("blob " + this.snapshot.length + "\x00" + this.snapshot)
this.__lastSubmitTimestamp = now; this.__lastSubmitTimestamp = now;
} }
} }

View file

@ -0,0 +1,80 @@
import { expect } from 'chai'
import { generateSHA1Hash } from '../../../../frontend/js/shared/utils/sha1'
function generateRandomString(length) {
const characters =
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
let result = ''
const charactersLength = characters.length
for (let i = 0; i < length; i++) {
result += characters.charAt(Math.floor(Math.random() * charactersLength))
}
return result
}
function generateMultipleRandomStrings(numStrings, maxLength) {
const randomStrings = []
for (let i = 0; i < numStrings; i++) {
const length = Math.floor(Math.random() * maxLength)
randomStrings.push(generateRandomString(length))
}
return [...new Set(randomStrings)]
}
describe('sha1', function () {
describe('generateSHA1Hash', function () {
const strings = generateMultipleRandomStrings(100, 1000)
it('generates 40 base16 characters', function () {
for (const str of strings)
expect(generateSHA1Hash(str)).to.match(/^[\da-f]{40}$/)
})
it("doesn't have collisions on a small set", function () {
expect(new Set(strings.map(generateSHA1Hash)).size).to.equal(
strings.length
)
})
it('sample string 1', function () {
expect(generateSHA1Hash('sample string 1')).to.equal(
'135028161629af5901ea2f15554730dc0de38a01'
)
})
it('sample string 2', function () {
expect(generateSHA1Hash('sample string 2')).to.equal(
'db9460374e49a7c737b609c2fb37302381f345d6'
)
})
it('abc', function () {
expect(generateSHA1Hash('abc')).to.equal(
'a9993e364706816aba3e25717850c26c9cd0d89d'
)
})
it('generates a sha1 for an empty string', function () {
expect(generateSHA1Hash('')).to.equal(
'da39a3ee5e6b4b0d3255bfef95601890afd80709'
)
})
it('abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq', function () {
expect(
generateSHA1Hash(
'abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq'
)
).to.equal('84983e441c3bd26ebaae4aa1f95129e5e54670f1')
})
it('abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu', function () {
expect(
generateSHA1Hash(
'abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu'
)
).to.equal('a49b2446a02c645bf419f995b67091253a04a259')
})
})
})