Big steps

This commit is contained in:
Nehemiah of Zebulun 2023-08-27 16:20:58 +02:00
parent bf94132c1c
commit 372d684650
16 changed files with 387 additions and 52 deletions

View File

@ -42,6 +42,8 @@ dependencies {
implementation("com.google.android.material:material:1.9.0")
implementation("androidx.room:room-common:2.5.2")
implementation("androidx.room:room-ktx:2.5.2")
implementation("androidx.preference:preference-ktx:1.2.1")
implementation("com.squareup.okhttp3:okhttp:5.0.0-alpha.10")
testImplementation("junit:junit:4.13.2")
androidTestImplementation("androidx.test.ext:junit:1.1.5")
androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")

View File

@ -1,6 +1,10 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
<uses-permission android:name="android.permission.INTERNET" />
<uses-permission android:name="android.permission.RECORD_AUDIO" />
<uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
<application
android:allowBackup="true"
android:dataExtractionRules="@xml/data_extraction_rules"
@ -22,5 +26,16 @@
</intent-filter>
<meta-data android:name="android.view.im" android:resource="@xml/method" />
</service>
<activity
android:name=".preferences.PreferencesActivity"
android:label="@string/app_preferences_name"
android:exported="false"
android:theme="@style/Theme.AppCompat.DayNight">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
</intent-filter>
</activity>
</application>
</manifest>

View File

@ -1,7 +1,9 @@
package net.mezimmah.wkt9
import android.annotation.SuppressLint
import android.content.Intent
import android.inputmethodservice.InputMethodService
import android.media.MediaRecorder
import android.text.InputType
import android.util.Log
import android.view.KeyEvent
@ -10,6 +12,7 @@ import android.view.ViewConfiguration
import android.view.inputmethod.EditorInfo
import android.widget.LinearLayout
import android.widget.TextView
import android.widget.Toast
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.Job
@ -21,6 +24,7 @@ import net.mezimmah.wkt9.db.AppDatabase
import net.mezimmah.wkt9.inputmode.InputMode
import net.mezimmah.wkt9.inputmode.AlphaInputMode
import net.mezimmah.wkt9.inputmode.NumericInputMode
import net.mezimmah.wkt9.inputmode.Status
import net.mezimmah.wkt9.inputmode.WordInputMode
import net.mezimmah.wkt9.inputmode.WKT9InputMode
import net.mezimmah.wkt9.keypad.KeyCodeMapping
@ -28,6 +32,9 @@ import net.mezimmah.wkt9.keypad.KeyEventResult
import net.mezimmah.wkt9.keypad.KeyLayout
import net.mezimmah.wkt9.keypad.Keypad
import net.mezimmah.wkt9.t9.T9
import net.mezimmah.wkt9.voice.Whisper
import okio.IOException
import java.io.File
import java.lang.StringBuilder
class WKT9: InputMethodService() {
@ -39,9 +46,10 @@ class WKT9: InputMethodService() {
private lateinit var settingDao: SettingDao
// Coroutines
private val job = SupervisorJob()
private val scope = CoroutineScope(Dispatchers.Main + job)
private val queryScope = CoroutineScope(Dispatchers.Main + SupervisorJob())
private var queryJob: Job? = null
private val ioScope = CoroutineScope(Dispatchers.IO + SupervisorJob())
private var ioJob: Job? = null
private var cursorPosition = 0
private var longPressTimeout = 700
@ -62,9 +70,16 @@ class WKT9: InputMethodService() {
private var composing = false
private val candidates: MutableList<String> = mutableListOf()
private var candidateIndex = 0
private var sentenceStart = false
// UI
private lateinit var inputView: View
private var toast: Toast? = null
// Whisper
private val whisper: Whisper = Whisper()
private var recorder: MediaRecorder? = null
private var recording: File? = null
override fun onCreate() {
Log.d(tag, "WKT9 is loading")
@ -145,6 +160,9 @@ class WKT9: InputMethodService() {
val inputType = attribute?.inputType?.and(InputType.TYPE_MASK_CLASS) ?: 0
cursorPosition = attribute?.initialSelEnd ?: 0
sentenceStart =
if (cursorPosition == 0) true
else isSentenceStart()
when (inputType) {
InputType.TYPE_CLASS_DATETIME,
@ -179,15 +197,6 @@ class WKT9: InputMethodService() {
)
}
private fun cancelComposing() {
composing = false
currentInputConnection.let {
it.setComposingText("", 1)
it.finishComposingText()
}
}
private fun clearCandidates() {
clearCandidateUI()
@ -201,8 +210,8 @@ class WKT9: InputMethodService() {
candidatesView.removeAllViews()
}
private fun commitText(text: CharSequence, start: Int, end: Int, cursorPosition: Int): Boolean {
return (markComposingRegion(start, end) && composeText(text, cursorPosition) && finishComposingText())
private fun commitText(text: CharSequence, start: Int, end: Int): Boolean {
return (markComposingRegion(start, end) && composeText(text, 1) && finishComposingText())
}
private fun composeText(text: CharSequence, cursorPosition: Int = 1): Boolean {
@ -213,12 +222,17 @@ class WKT9: InputMethodService() {
private fun deleteText(beforeCursor: Int, afterCursor: Int) {
currentInputConnection?.deleteSurroundingText(beforeCursor, afterCursor)
sentenceStart = isSentenceStart()
}
// Todo: inputType
private fun enableInputMode(mode: WKT9InputMode, inputType: Int) {
lastInputMode = mode
if (inputType.and(InputType.TYPE_TEXT_VARIATION_EMAIL_ADDRESS) == InputType.TYPE_TEXT_VARIATION_EMAIL_ADDRESS)
Log.d(tag, "InputConnection expects email address")
inputMode = when(mode) {
WKT9InputMode.ALPHA -> alphaInputMode
WKT9InputMode.NUMERIC -> numericInputMode
@ -229,22 +243,47 @@ class WKT9: InputMethodService() {
private fun finishComposingText(): Boolean {
return if (composing) {
composing = false
sentenceStart = isSentenceStart()
currentInputConnection?.finishComposingText() ?: false
} else false
}
private fun goHome() {
with(Intent(Intent.ACTION_MAIN)) {
this.addCategory(Intent.CATEGORY_HOME)
this.flags = Intent.FLAG_ACTIVITY_NEW_TASK
startActivity(this)
}
}
private fun handleKeyEventResult(res: KeyEventResult): Boolean {
if (res.finishComposing) finishComposingText()
if (res.startComposing) markComposingRegion()
if (!res.codeWord.isNullOrEmpty()) onCodeWordUpdate(res.codeWord)
if (!res.candidates.isNullOrEmpty()) onCandidates(res.candidates)
if (res.deleteBeforeCursor > 0 || res.deleteAfterCursor > 0) onDelete(res.deleteBeforeCursor, res.deleteAfterCursor)
if (res.goHome) goHome()
if (res.left) onLeft()
if (res.right) onRight()
if (res.record) onRecord()
if (res.transcribe) onTranscribe()
return res.consumed
}
private fun isSentenceStart(): Boolean {
if (cursorPosition == 0) return true
val textBeforeCursor = currentInputConnection?.getTextBeforeCursor(10, 0) ?: return false
if (
textBeforeCursor.trimEnd().isEmpty() ||
listOf('.', '!', '?').contains(textBeforeCursor.trimEnd().last())) return true
return false
}
private fun loadCandidates(highLight: Int? = null) {
val candidatesView = inputView.findViewById<LinearLayout>(R.id.suggestions)
@ -284,7 +323,7 @@ class WKT9: InputMethodService() {
clearCandidates()
queryJob?.cancel()
queryJob = scope.launch {
queryJob = queryScope.launch {
val hasCandidates = queryT9Candidates(codeWord, 10)
if (!hasCandidates) return@launch
@ -311,6 +350,41 @@ class WKT9: InputMethodService() {
composeText(candidates[candidateIndex])
}
private fun onRecord() {
// The recorder must be busy...
if (recorder !== null) return
clearCandidates()
recording?.delete()
// Toast settings
val text = "Recording now.\nRelease the button to start transcribing."
val duration = Toast.LENGTH_SHORT
// Instantiate recorder and start recording
recorder = MediaRecorder(this).also {
recording = File.createTempFile("recording.3gp", null, cacheDir)
it.setAudioSource(MediaRecorder.AudioSource.VOICE_RECOGNITION)
it.setOutputFormat(MediaRecorder.OutputFormat.THREE_GPP)
it.setAudioEncoder(MediaRecorder.AudioEncoder.AMR_NB)
it.setOutputFile(recording)
try {
it.prepare()
it.start()
toast?.cancel()
toast = Toast.makeText(this, text, duration).apply {
this.show()
}
} catch (e: Exception) {
Log.d(tag, "Failed to start recording", e)
}
}
}
private fun onRight() {
if (candidates.isEmpty()) return
@ -323,11 +397,45 @@ class WKT9: InputMethodService() {
composeText(candidates[candidateIndex])
}
private fun onTranscribe() {
val recorder = this.recorder ?: return
recorder.stop()
recorder.reset()
recorder.release()
this.recorder = null
val text = "Sending recording to speech-to-text server for transcription."
val duration = Toast.LENGTH_SHORT
toast?.cancel()
toast = Toast.makeText(this, text, duration).apply {
this.show()
}
ioJob?.cancel()
ioJob = ioScope.launch {
try {
val transcription = whisper.run(recording!!)
commitText(transcription, cursorPosition, cursorPosition)
} catch (e: IOException) {
Log.d(tag, "A failure occurred in the communication with the speech-to-text server", e)
}
}
}
private suspend fun queryT9Candidates(codeWord: StringBuilder, limit: Int = 10): Boolean {
val words = wordDao.findCandidates(codeWord.toString(), limit)
words.forEach {
candidates.add(it.word)
words.forEach { word ->
val candidate =
if (sentenceStart && inputMode?.status == Status.WORD_CAP) word.word.replaceFirstChar { it.uppercase() }
else if (inputMode?.status == Status.WORD_UPPER) word.word.uppercase()
else word.word
candidates.add(candidate)
}
return words.isNotEmpty()

View File

@ -4,6 +4,9 @@ import net.mezimmah.wkt9.keypad.Key
import net.mezimmah.wkt9.keypad.KeyEventResult
class AlphaInputMode: InputMode {
override var status: Status = Status.ALPHA_CAP
private set
override fun onKeyDown(key: Key): KeyEventResult {
return KeyEventResult(consumed = false)
}

View File

@ -4,6 +4,8 @@ import net.mezimmah.wkt9.keypad.Key
import net.mezimmah.wkt9.keypad.KeyEventResult
interface InputMode {
val status: Status
fun onKeyDown(key: Key): KeyEventResult
fun onKeyLongDown(key: Key): KeyEventResult

View File

@ -12,6 +12,9 @@ class NumericInputMode: InputMode {
private val keyCommandResolver: KeyCommandResolver = KeyCommandResolver.getBasic()
private val codeWord = StringBuilder()
override var status: Status = Status.NUM
private set
override fun onKeyDown(key: Key): KeyEventResult {
return when(keyCommandResolver.getCommand(key)) {
Command.CHARACTER -> buildCodeWord(key)

View File

@ -0,0 +1,11 @@
package net.mezimmah.wkt9.inputmode
enum class Status(val idx: Int) {
WORD(0),
WORD_CAP(1),
WORD_UPPER(2),
ALPHA(3),
ALPHA_CAP(4),
ALPHA_UPPER(5),
NUM(6)
}

View File

@ -6,7 +6,6 @@ import net.mezimmah.wkt9.keypad.Key
import net.mezimmah.wkt9.keypad.KeyCommandResolver
import net.mezimmah.wkt9.keypad.KeyEventResult
import java.lang.StringBuilder
import java.lang.annotation.Native
class WordInputMode: InputMode {
private val tag = "WKT9"
@ -16,66 +15,54 @@ class WordInputMode: InputMode {
private var keyIndex = 0
private var lastKey: Key? = null
override var status: Status = Status.WORD_CAP
private set
init {
Log.d(tag, "Started word input mode.")
}
override fun onKeyDown(key: Key): KeyEventResult {
keyStats(key)
val command = keyCommandResolver.getCommand(key)
Log.d(tag, "Command: $command")
return when(keyCommandResolver.getCommand(key)) {
Command.BACK -> KeyEventResult(false)
Command.CHARACTER -> buildCodeWord(key)
// Command.SELECT -> true
Command.DELETE -> deleteCharacter()
Command.SPACE -> finalizeWordOrSentence()
Command.LEFT -> navigateLeft()
Command.RIGHT -> navigateRight()
// Command.CYCLE_CANDIDATES -> cycleCandidates()
else -> KeyEventResult()
}
}
override fun onKeyLongDown(key: Key): KeyEventResult {
// Log.d(tag, "onKeyLongDown")
val command = keyCommandResolver.getCommand(key = key, longPress = true)
Log.d(tag, "Command: $command")
return KeyEventResult()
return when(keyCommandResolver.getCommand(key, true)) {
Command.RECORD -> record()
else -> KeyEventResult(true)
}
}
override fun onKeyDownRepeatedly(key: Key, repeat: Int): KeyEventResult {
return when(keyCommandResolver.getCommand(key, repeat = repeat)) {
Command.HOME -> goHome(repeat)
Command.DELETE -> deleteCharacter(repeat)
else -> KeyEventResult()
}
}
override fun afterKeyDown(key: Key): KeyEventResult {
// Log.d(tag, "afterKeyDown")
// return when(keyCommandResolver.getCommand(key, after = true)) {
// Command.DELETE -> deleteCharacter(repeat)
// else -> KeyEventResult()
// }
val command = keyCommandResolver.getCommand(key, after = true)
Log.d(tag, "Command: $command")
return KeyEventResult(false)
return when(keyCommandResolver.getCommand(key, after = true)) {
Command.BACK -> goBack()
else -> KeyEventResult()
}
}
override fun afterKeyLongDown(key: Key, keyDownMS: Long): KeyEventResult {
// Log.d(tag, "afterKeyLongDown")
return KeyEventResult()
return when(keyCommandResolver.getCommand(key, after = true, longPress = true)) {
Command.TRANSCRIBE -> transcribe()
else -> KeyEventResult()
}
}
private fun buildCodeWord(key: Key): KeyEventResult {
@ -115,6 +102,27 @@ class WordInputMode: InputMode {
)
}
private fun goBack(): KeyEventResult {
reset()
return KeyEventResult(
consumed = false,
finishComposing = true
)
}
private fun goHome(repeat: Int): KeyEventResult {
if (repeat > 1) return KeyEventResult(true)
reset()
return KeyEventResult(
consumed = true,
finishComposing = true,
goHome = true
)
}
private fun keyStats(key: Key) {
when (key != lastKey) {
true -> {
@ -137,4 +145,28 @@ class WordInputMode: InputMode {
private fun navigateRight(): KeyEventResult {
return KeyEventResult(right = true)
}
private fun record(): KeyEventResult {
codeWord.clear()
return KeyEventResult(
consumed = true,
finishComposing = true,
record = true
)
}
private fun reset() {
codeWord.clear()
newKey = true
keyIndex = 0
lastKey = null
}
private fun transcribe(): KeyEventResult {
return KeyEventResult(
consumed = true,
transcribe = true
)
}
}

View File

@ -51,8 +51,6 @@ class KeyCommandResolver (
)),
onLong = HashMap(mapOf(
Key.BACK to Command.HOME,
Key.N0 to Command.NUMBER,
Key.N1 to Command.NUMBER,
Key.N2 to Command.NUMBER,
@ -78,6 +76,7 @@ class KeyCommandResolver (
)),
onRepeat = HashMap(mapOf(
Key.BACK to Command.HOME,
Key.STAR to Command.DELETE,
))
)

View File

@ -10,6 +10,9 @@ data class KeyEventResult(
val candidates: List<String>? = null,
val deleteBeforeCursor: Int = 0,
val deleteAfterCursor: Int = 0,
val goHome: Boolean = false,
val left: Boolean = false,
val right: Boolean = false
val right: Boolean = false,
val record: Boolean = false,
val transcribe: Boolean = false
)

View File

@ -0,0 +1,17 @@
package net.mezimmah.wkt9.preferences
import android.os.Bundle
import androidx.appcompat.app.AppCompatActivity
import net.mezimmah.wkt9.R
class PreferencesActivity: AppCompatActivity() {
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.preferences_container)
supportFragmentManager
.beginTransaction()
.replace(R.id.preferences_container, PreferencesFragment())
.commit()
}
}

View File

@ -0,0 +1,59 @@
package net.mezimmah.wkt9.preferences
import android.content.SharedPreferences
import android.os.Bundle
import android.Manifest
import android.os.Build
import android.util.Log
import androidx.activity.result.contract.ActivityResultContracts.RequestMultiplePermissions
import androidx.preference.PreferenceFragmentCompat
import androidx.preference.SwitchPreference
import net.mezimmah.wkt9.R
class PreferencesFragment: PreferenceFragmentCompat(),
SharedPreferences.OnSharedPreferenceChangeListener {
private val tag = "WKT9"
private val requestPermissionLauncher = registerForActivityResult(RequestMultiplePermissions()) { isGranted: Map<String, Boolean> ->
// If any permission got denied we programmatically disable the option
if (isGranted.containsValue(false)) {
val key = getString(R.string.preference_setting_speech_to_text_key)
findPreference<SwitchPreference>(key)?.isChecked = false
}
}
override fun onCreatePreferences(savedInstanceState: Bundle?, rootKey: String?) {
Log.d(tag, "Loading preferences")
setPreferencesFromResource(R.xml.preferences, rootKey)
}
override fun onResume() {
super.onResume()
preferenceScreen.sharedPreferences?.registerOnSharedPreferenceChangeListener(this)
}
override fun onPause() {
super.onPause()
preferenceScreen.sharedPreferences?.unregisterOnSharedPreferenceChangeListener(this)
}
override fun onSharedPreferenceChanged(p0: SharedPreferences?, key: String?) {
when (key) {
getString(R.string.preference_setting_speech_to_text_key) -> {
if (findPreference<SwitchPreference>(key)?.isChecked == true) {
val permissions = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) {
arrayOf(
Manifest.permission.RECORD_AUDIO,
Manifest.permission.POST_NOTIFICATIONS
)
} else arrayOf(Manifest.permission.RECORD_AUDIO)
requestPermissionLauncher.launch(permissions)
}
}
}
}
}

View File

@ -0,0 +1,41 @@
package net.mezimmah.wkt9.voice
import okhttp3.MediaType.Companion.toMediaType
import okhttp3.MultipartBody
import okhttp3.OkHttpClient
import okhttp3.Request
import okhttp3.RequestBody.Companion.asRequestBody
import java.io.File
import java.io.IOException
import java.util.concurrent.TimeUnit
class Whisper {
private val client: OkHttpClient = OkHttpClient.Builder()
.connectTimeout(2, TimeUnit.SECONDS)
.writeTimeout(5, TimeUnit.SECONDS)
.readTimeout(25, TimeUnit.SECONDS)
.callTimeout(32, TimeUnit.SECONDS)
.build()
fun run(recording: File): String {
val mediaType = "audio/3gpp".toMediaType()
val requestBody = MultipartBody.Builder()
.setType(MultipartBody.FORM)
.addFormDataPart("language", "en")
.addFormDataPart("model_size", "tiny.en")
.addFormDataPart("files", "recording.3gp", recording.asRequestBody(mediaType))
.build()
val request = Request.Builder()
.url("https://voice.mezimmah.net")
.post(requestBody)
.build()
return client.newCall(request).execute().use { response ->
if (!response.isSuccessful) throw IOException("Unexpected code $response")
response.body.string().trim()
}
}
}

View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="utf-8"?>
<FrameLayout
xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools"
android:id="@+id/preferences_container"
android:layout_width="match_parent"
android:layout_height="match_parent"
tools:context=".preferences.PreferencesActivity">
</FrameLayout>

View File

@ -1,3 +1,15 @@
<resources>
<string name="app_name">WKT9</string>
<string name="app_preferences_name">WKT9 Preferences</string>
<string name="preference_category_speech_to_text_name">Speech to Text</string>
<string name="preference_setting_speech_to_text_key">speech_to_text</string>
<string name="preference_setting_speech_to_text_title">Enable Speech to Text</string>
<string name="preference_setting_speech_to_text_summary">For this feature to work net.mezimmah.wkt9.WKT9 needs permission to show notifications and record audio. You will be asked to grant these permissions if you haven\'t already permitted it.</string>
<string name="preference_setting_whisper_url_key">whisper_url</string>
<string name="preference_setting_whisper_url_title">Whisper Server URL</string>
<string name="preference_setting_whisper_url_summary">Provide an URL to the Whisper server.</string>
</resources>

View File

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="utf-8"?>
<PreferenceScreen
xmlns:app="http://schemas.android.com/apk/res-auto">
<PreferenceCategory
app:title="@string/preference_category_speech_to_text_name" />
<SwitchPreference
app:key="@string/preference_setting_speech_to_text_key"
app:title="@string/preference_setting_speech_to_text_title"
app:summary="@string/preference_setting_speech_to_text_summary" />
<EditTextPreference
app:key="@string/preference_setting_whisper_url_key"
app:title="@string/preference_setting_whisper_url_title"
app:summary="@string/preference_setting_whisper_url_summary"
app:dependency="@string/preference_setting_speech_to_text_key" />
</PreferenceScreen>